Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FLINK-16973][tests] Add tooling for collecting jvm crash files #11638

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ resources:
# Container with Maven 3.2.5, SSL to have the same environment everywhere.
- container: flink-build-container
image: rmetzger/flink-ci:ubuntu-amd64-3528acd
# On AZP provided machines, set this flag to allow writing coredumps in docker
options: --privileged

# Define variables:
# - See tools/azure-pipelines/jobs-template.yml for a short summary of the caching
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ private static boolean isZookeeperRunning(final Path kafkaDir) {
private static boolean isKafkaRunning(final Path kafkaDir) throws IOException {
try {
final AtomicBoolean atomicBrokerStarted = new AtomicBoolean(false);
queryBrokerStatus(kafkaDir, line -> atomicBrokerStarted.compareAndSet(false, !line.contains("Node does not exist")));
queryBrokerStatus(kafkaDir, line -> atomicBrokerStarted.compareAndSet(false, line.contains("dataLength =")));
return atomicBrokerStarted.get();
} catch (final IOException ioe) {
// we get an exception if zookeeper isn't running
Expand Down
3 changes: 3 additions & 0 deletions flink-end-to-end-tests/test-scripts/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ function check_logs_for_errors {
| grep -v "Error while loading kafka-version.properties :null" \
| grep -v "Failed Elasticsearch item request" \
| grep -v "[Terror] modules" \
| grep -v "HeapDumpOnOutOfMemoryError" \
| grep -ic "error" || true)
if [[ ${error_count} -gt 0 ]]; then
echo "Found error in log files:"
Expand Down Expand Up @@ -401,12 +402,14 @@ function check_logs_for_exceptions {
function check_logs_for_non_empty_out_files {
echo "Checking for non-empty .out files..."
# exclude reflective access warnings as these are expected (and currently unavoidable) on Java 9
# exclude message about JAVA_TOOL_OPTIONS being set (https://bugs.openjdk.java.net/browse/JDK-8039152)
if grep -ri -v \
-e "WARNING: An illegal reflective access" \
-e "WARNING: Illegal reflective access"\
-e "WARNING: Please consider reporting"\
-e "WARNING: Use --illegal-access"\
-e "WARNING: All illegal access"\
-e "Picked up JAVA_TOOL_OPTIONS"\
$FLINK_DIR/log/*.out\
| grep "." \
> /dev/null; then
Expand Down
2 changes: 2 additions & 0 deletions tools/azure-pipelines/jobs-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ jobs:
echo "##vso[task.setvariable variable=PATH]$JAVA_HOME_11_X64/bin:$PATH"
displayName: "Set to jdk11"
condition: eq('${{parameters.jdk}}', 'jdk11')
- script: sudo sysctl -w kernel.core_pattern=core.%p
displayName: Set coredump pattern
# Test
- script: STAGE=test ${{parameters.environment}} ./tools/azure_controller.sh $(module)
displayName: Test - $(module)
Expand Down
10 changes: 10 additions & 0 deletions tools/ci/maven-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,13 @@ function setup_maven {

echo "Installed Maven ${MAVEN_VERSION} to ${M2_HOME}"
}

function collect_coredumps {
local SEARCHDIR=$1
local TARGET_DIR=$2
echo "Searching for .dump, .dumpstream and related files in '$SEARCHDIR'"
for file in `find $SEARCHDIR -type f -regextype posix-extended -iregex '.*\.hprof|.*\.dump|.*\.dumpstream|.*hs.*\.log|.*/core(.[0-9]+)?$'`; do
echo "Moving '$file' to target directory ('$TARGET_DIR')"
mv $file $TARGET_DIR/
done
}
6 changes: 6 additions & 0 deletions tools/travis_watchdog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ if [ ! -z "$TF_BUILD" ] ; then
ARTIFACTS_FILE=${BUILD_BUILDNUMBER}.tar.gz
fi

# enable coredumps
ulimit -c unlimited
export JAVA_TOOL_OPTIONS="-XX:+HeapDumpOnOutOfMemoryError"

if [ $TEST == $STAGE_PYTHON ]; then
CMD=$PYTHON_TEST
CMD_PID=$PYTHON_PID
Expand Down Expand Up @@ -276,6 +280,8 @@ case $TEST in
;;
esac

collect_coredumps `pwd` $ARTIFACTS_DIR

upload_artifacts_s3

# since we are in flink/tools/artifacts
Expand Down