From 41a60d50d3afa83abec5d50a41eecacd20f29120 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Tue, 29 Mar 2011 16:12:59 -0400 Subject: [PATCH 1/2] Propagate error codes from nodetool & to_erl in the init script to the user --- rel/files/riak | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/rel/files/riak b/rel/files/riak index 91c8d5806..1adc79dce 100755 --- a/rel/files/riak +++ b/rel/files/riak @@ -102,6 +102,10 @@ case "$1" in ;; esac $NODETOOL stop + ES=$? + if [ "$ES" -ne 0 ]; then + exit $ES + fi while `kill -0 $PID 2>/dev/null`; do sleep 1 @@ -111,28 +115,41 @@ case "$1" in restart) ## Restart the VM without exiting the process $NODETOOL restart + ES=$? + if [ "$ES" -ne 0 ]; then + exit $ES + fi ;; reboot) ## Restart the VM completely (uses heart to restart it) $NODETOOL reboot + ES=$? + if [ "$ES" -ne 0 ]; then + exit $ES + fi ;; ping) ## See if the VM is alive $NODETOOL ping + ES=$? + if [ "$ES" -ne 0 ]; then + exit $ES + fi ;; attach) # Make sure a node IS running RES=`$NODETOOL ping` - if [ "$RES" != "pong" ]; then + ES=$? + if [ "$ES" -ne 0 ]; then echo "Node is not running!" - exit 1 + exit $ES fi shift - $ERTS_PATH/to_erl $PIPE_DIR + exec $ERTS_PATH/to_erl $PIPE_DIR ;; console) From 830ef26723527ac33f790f62ba3b9739ebaa384c Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Tue, 29 Mar 2011 16:15:07 -0400 Subject: [PATCH 2/2] Wait for the application to actually come up in 'riak start' This allows you to see if riak is instantly crashing on startup instead of silently failing. --- rel/files/riak | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/rel/files/riak b/rel/files/riak index 1adc79dce..406d4479f 100755 --- a/rel/files/riak +++ b/rel/files/riak @@ -78,6 +78,30 @@ case "$1" in mkdir -p $PIPE_DIR $ERTS_PATH/run_erl -daemon $PIPE_DIR/ $RUNNER_LOG_DIR \ "exec $RUNNER_SCRIPT_DIR/$SCRIPT console" 2>&1 + + # Wait for the node to come up. We can't just ping it because + # distributed erlang comes up for a second before riak crashes + # (eg. in the case of an unwriteable disk). Once the node comes + # up we check for the node watcher process. If that's running + # then we assume things are good enough. This will at least let + # the user know when riak is crashing right after startup. + WAIT=${WAIT_FOR_ERLANG:-15} + while [ $WAIT -gt 0 ]; do + WAIT=$[$WAIT - 1] + sleep 1 + RES=`$NODETOOL ping` + if [ "$?" -ne 0 ]; then + continue + fi + NODEWATCHER=`$NODETOOL rpcterms erlang whereis "'riak_core_node_watcher'."` + if [ "$NODEWATCHER" != "undefined" ]; then + exit 0 + fi + done + echo "Riak failed to start within ${WAIT_FOR_ERLANG:-15} seconds." + echo "If you want to wait longer, set the environment variable" + echo "WAIT_FOR_ERLANG to the number of seconds to wait." + exit 1 ;; stop)