Skip to content
Permalink
Browse files
[CI] Dump stacktrace when a job is cancelled
### Motivation

Sometimes CI jobs fail due to timeout. It would be useful understand what the latest test was doing before being interrupted. 

### Changes

* Added a new script for dumping stacktrace.
* Added in all the jobs the step in case of `cancelled()` is true.


Reviewers: Andrey Yegorov <None>

This closes #3042 from nicoloboschi/ci-thread-dump
  • Loading branch information
nicoloboschi committed Feb 15, 2022
1 parent f19544b commit 80a110096368b54831eb0302bcdfb5b9555f86bc
Show file tree
Hide file tree
Showing 11 changed files with 94 additions and 2 deletions.
@@ -50,4 +50,8 @@ jobs:
java-version: 1.8

- name: Run bookie test
run: ./gradlew bookkeeper-server:test --tests="org.apache.bookkeeper.bookie.*" ${GRADLE_ARGS}
run: ./gradlew bookkeeper-server:test --tests="org.apache.bookkeeper.bookie.*" ${GRADLE_ARGS}

- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps
@@ -50,3 +50,7 @@ jobs:
java-version: 1.8
- name: Run client tests
run: ./gradlew bookkeeper-server:test --tests="org.apache.bookkeeper.client.*" ${GRADLE_ARGS}

- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps
@@ -51,3 +51,6 @@ jobs:
- name: Build with gradle
run: |
./gradlew test -x bookkeeper-server:test -x tests:integration:cluster:test -x tests:integration:smoke:test -x tests:integration:standalone:test -PexcludeTests="**/distributedlog/**, **/statelib/**, **/clients/**, **/*common/**, **/stream/**, **/stream/*bk*/**, **/*backward*/**" ${GRADLE_ARGS}
- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps
@@ -51,3 +51,7 @@ jobs:
- name: Build with gradle
run: |
./gradlew test -x bookkeeper-server:test -x tests:integration:cluster:test -x tests:integration:smoke:test -x tests:integration:standalone:test -PexcludeTests="**/distributedlog/**, **/statelib/**, **/clients/**, **/*common/**, **/stream/**, **/stream/*bk*/**, **/*backward*/**" ${GRADLE_ARGS}
- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps
@@ -57,3 +57,7 @@ jobs:
run: ./gradlew tests:integration:smoke:test ${GRADLE_ARGS}
- name: run standalone test
run: ./gradlew tests:integration:standalone:test ${GRADLE_ARGS}

- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps
@@ -50,4 +50,8 @@ jobs:
with:
java-version: 1.8
- name: Run remaining tests
run: ./gradlew bookkeeper-server:test -PexcludeTests="*org.apache.bookkeeper.bookie.*, *org.apache.bookkeeper.client.*, *org.apache.bookkeeper.replication.*, *org.apache.bookkeeper.tls.*" ${GRADLE_ARGS}
run: ./gradlew bookkeeper-server:test -PexcludeTests="*org.apache.bookkeeper.bookie.*, *org.apache.bookkeeper.client.*, *org.apache.bookkeeper.replication.*, *org.apache.bookkeeper.tls.*" ${GRADLE_ARGS}

- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps
@@ -50,3 +50,7 @@ jobs:
java-version: 1.8
- name: Run replication tests
run: ./gradlew bookkeeper-server:test --tests="org.apache.bookkeeper.replication.*" ${GRADLE_ARGS}

- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps
@@ -64,3 +64,7 @@ jobs:
- name: Run stream:storage:impl tests
run: ./gradlew stream:storage:impl:test ${GRADLE_ARGS}

- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps

@@ -50,3 +50,7 @@ jobs:
java-version: 1.8
- name: Run tls tests
run: ./gradlew bookkeeper-server:test --tests="org.apache.bookkeeper.tls.*" ${GRADLE_ARGS}

- name: print JVM thread dumps when cancelled
if: cancelled()
run: ./dev/ci-tool print_thread_dumps
@@ -315,6 +315,7 @@ allprojects {
}
dependencies {
implementation(enforcedPlatform(depLibs.nettyBom))
testImplementation depLibs.log4jSlf4jImpl
}
tasks.register('writeClasspath') {
doLast {
@@ -0,0 +1,56 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

# shell function library for Bookkeeper CI builds

# lists all available functions in this tool
function ci_list_functions() {
declare -F | awk '{print $NF}' | sort | egrep '^ci_' | sed 's/^ci_//'
}

# prints thread dumps for all running JVMs
# used in CI when a job gets cancelled because of a job timeout
function ci_print_thread_dumps() {
for java_pid in $(jps -q -J-XX:+PerfDisableSharedMem); do
echo "----------------------- pid $java_pid -----------------------"
cat /proc/$java_pid/cmdline | xargs -0 echo
jcmd $java_pid Thread.print -l
jcmd $java_pid GC.heap_info
done
return 0
}

if [ -z "$1" ]; then
echo "usage: $0 [ci_tool_function_name]"
echo "Available ci tool functions:"
ci_list_functions
exit 1
fi
ci_function_name="ci_$1"
shift

if [[ "$(LC_ALL=C type -t $ci_function_name)" == "function" ]]; then
eval "$ci_function_name" "$@"
else
echo "Invalid ci tool function"
echo "Available ci tool functions:"
ci_list_functions
exit 1
fi

0 comments on commit 80a1100

Please sign in to comment.