Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CLOUD-3349] - openshift-migrate.sh terminates with Max retries exceeded with url: /management #135

Merged
merged 2 commits into from Aug 20, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
62 changes: 39 additions & 23 deletions os-eap-migration/added/launch/openshift-migrate-common.sh
Expand Up @@ -7,7 +7,8 @@ source /opt/partition/partitionPV.sh

function runMigration() {
local instanceDir=$1

local COUNT=30
local SLEEP=5
# if count provided the node_name should be constructed
local count=$2
[ "x$count" != "x" ] && export NODE_NAME="${NODE_NAME:-node}-${count}"
Expand All @@ -30,31 +31,46 @@ function runMigration() {
trap "echo Received TERM ; touch \"${terminatingFile}\" ; kill -TERM $PID ; " TERM
local success=false
local message="Finished, migration pod has been terminated"
${JBOSS_HOME}/bin/readinessProbe.sh
local probeStatus=$?
local probeStatus=0

if [ $probeStatus -eq 0 ] ; then
echo "$(date): Server started, checking for transactions"

local startTime=$(date +'%s')
local endTime=$((startTime + ${RECOVERY_TIMEOUT} + 1))

local socketBinding=$(run_cli_cmd '/subsystem=transactions/:read-attribute(name="socket-binding")' | grep -w result | sed -e 's+^.*=> "++' -e 's+".*$++')
local recoveryPort=$(run_cli_cmd '/socket-binding-group=standard-sockets/socket-binding='"${socketBinding}"'/:read-attribute(name="bound-port")' | grep -w result | sed -e 's+^.*=> ++')
local recoveryHost=$(run_cli_cmd '/socket-binding-group=standard-sockets/socket-binding='"${socketBinding}"'/:read-attribute(name="bound-address")' | grep -w result | sed -e 's+^.*=> "++' -e 's+".*$++')

if [ "${recoveryPort}" != "undefined" ] ; then
local recoveryClass="com.arjuna.ats.arjuna.tools.RecoveryMonitor"
recoveryJar=$(find "${JBOSS_HOME}" -name \*.jar | xargs grep -l "${recoveryClass}")
if [ -n "${recoveryJar}" ] ; then
echo "$(date): Executing synchronous recovery scan for a first time"
java -cp "${recoveryJar}" "${recoveryClass}" -host "${recoveryHost}" -port "${recoveryPort}" -timeout 1800000
echo "$(date): Executing synchronous recovery scan for a second time"
java -cp "${recoveryJar}" "${recoveryClass}" -host "${recoveryHost}" -port "${recoveryPort}" -timeout 1800000
echo "$(date): Synchronous recovery scans finished for the first and the second time"
# this sleeps for 10s before the first probe, to emulate the old behavior
sleep 10

for i in `seq ${COUNT}`
do
echo "Checking readiness probe status for server start."
${JBOSS_HOME}/bin/readinessProbe.sh
probeStatus=$?
if [ $probeStatus -eq 0 ] ; then
echo "$(date): Server started, checking for transactions"

local startTime=$(date +'%s')
local endTime=$((startTime + ${RECOVERY_TIMEOUT} + 1))

local socketBinding=$(run_cli_cmd '/subsystem=transactions/:read-attribute(name="socket-binding")' | grep -w result | sed -e 's+^.*=> "++' -e 's+".*$++')
local recoveryPort=$(run_cli_cmd '/socket-binding-group=standard-sockets/socket-binding='"${socketBinding}"'/:read-attribute(name="bound-port")' | grep -w result | sed -e 's+^.*=> ++')
local recoveryHost=$(run_cli_cmd '/socket-binding-group=standard-sockets/socket-binding='"${socketBinding}"'/:read-attribute(name="bound-address")' | grep -w result | sed -e 's+^.*=> "++' -e 's+".*$++')

if [ "${recoveryPort}" != "undefined" ] ; then
local recoveryClass="com.arjuna.ats.arjuna.tools.RecoveryMonitor"
# we may have > 1 jar, if that is the case we use the most recent one
recoveryJars=$(find "${JBOSS_HOME}" -name \*.jar | xargs grep -l "${recoveryClass}")
recoveryJar=$(ls -Art $recoveryJars | tail -n 1)
if [ -n "${recoveryJar}" ] ; then
echo "$(date): Executing synchronous recovery scan for a first time"
java -cp "${recoveryJar}" "${recoveryClass}" -host "${recoveryHost}" -port "${recoveryPort}" -timeout 1800000
echo "$(date): Executing synchronous recovery scan for a second time"
java -cp "${recoveryJar}" "${recoveryClass}" -host "${recoveryHost}" -port "${recoveryPort}" -timeout 1800000
echo "$(date): Synchronous recovery scans finished for the first and the second time"
fi
fi
# probe was successful, exit loop
break
else
echo "Sleeping ${SLEEP} seconds before retrying readiness probe."
sleep ${SLEEP}
fi
fi
done

# -- checking if the pod log is clean from errors (only if function of the particular name exists, provided by the os-partition module)
if [ $probeStatus -eq 0 ] && [ "$(type -t probePodLogForRecoveryErrors)" = 'function' ]; then
Expand Down