From 0889bf55e29cd98ac0008f372ca7daf9b97a2745 Mon Sep 17 00:00:00 2001 From: JF Denise Date: Wed, 11 Sep 2019 12:09:26 +0200 Subject: [PATCH] probes 3, relies on status --- os-eap-probes/3.0/added/livenessProbe.sh | 38 +++ os-eap-probes/3.0/added/probe_common.sh | 43 ++++ .../3.0/added/probes/probe/__init__.py | 0 os-eap-probes/3.0/added/probes/probe/api.py | 184 ++++++++++++++ os-eap-probes/3.0/added/probes/probe/dmr.py | 121 +++++++++ .../3.0/added/probes/probe/eap/__init__.py | 0 .../3.0/added/probes/probe/eap/dmr.py | 234 ++++++++++++++++++ .../3.0/added/probes/probe/eap/jolokia.py | 189 ++++++++++++++ .../3.0/added/probes/probe/jolokia.py | 104 ++++++++ os-eap-probes/3.0/added/probes/runner.py | 109 ++++++++ os-eap-probes/3.0/added/readinessProbe.sh | 30 +++ os-eap-probes/3.0/configure.sh | 15 ++ os-eap-probes/3.0/module.yaml | 14 ++ 13 files changed, 1081 insertions(+) create mode 100644 os-eap-probes/3.0/added/livenessProbe.sh create mode 100644 os-eap-probes/3.0/added/probe_common.sh create mode 100644 os-eap-probes/3.0/added/probes/probe/__init__.py create mode 100644 os-eap-probes/3.0/added/probes/probe/api.py create mode 100644 os-eap-probes/3.0/added/probes/probe/dmr.py create mode 100644 os-eap-probes/3.0/added/probes/probe/eap/__init__.py create mode 100644 os-eap-probes/3.0/added/probes/probe/eap/dmr.py create mode 100644 os-eap-probes/3.0/added/probes/probe/eap/jolokia.py create mode 100644 os-eap-probes/3.0/added/probes/probe/jolokia.py create mode 100644 os-eap-probes/3.0/added/probes/runner.py create mode 100644 os-eap-probes/3.0/added/readinessProbe.sh create mode 100644 os-eap-probes/3.0/configure.sh create mode 100644 os-eap-probes/3.0/module.yaml diff --git a/os-eap-probes/3.0/added/livenessProbe.sh b/os-eap-probes/3.0/added/livenessProbe.sh new file mode 100644 index 00000000..33c4f410 --- /dev/null +++ b/os-eap-probes/3.0/added/livenessProbe.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +. "$JBOSS_HOME/bin/probe_common.sh" + +if [ true = "${DEBUG}" ] ; then + # short circuit liveness check in dev mode + exit 0 +fi + +OUTPUT=/tmp/liveness-output +ERROR=/tmp/liveness-error +LOG=/tmp/liveness-log + +DEBUG_SCRIPT=false +PROBE_IMPL="probe.eap.dmr.EapProbe probe.eap.dmr.HealthCheckProbe" + +if [ $# -gt 0 ] ; then + DEBUG_SCRIPT=$1 +fi + +if [ $# -gt 1 ] ; then + PROBE_IMPL=$2 +fi + +if [ "$DEBUG_SCRIPT" = "true" ]; then + DEBUG_OPTIONS="--debug --logfile $LOG --loglevel DEBUG" +fi + +if python $JBOSS_HOME/bin/probes/runner.py -c READY -c NOT_READY $DEBUG_OPTIONS $PROBE_IMPL; then + exit 0 +fi + +if [ "$DEBUG_SCRIPT" == "true" ]; then + jps -v | grep standalone | awk '{print $1}' | xargs kill -3 +fi + +exit 1 + diff --git a/os-eap-probes/3.0/added/probe_common.sh b/os-eap-probes/3.0/added/probe_common.sh new file mode 100644 index 00000000..883d5a84 --- /dev/null +++ b/os-eap-probes/3.0/added/probe_common.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# common routines for readiness and liveness probes + +# jboss-cli.sh sometimes hangs indefinitely. Send SIGTERM after CLI_TIMEOUT has passed +# and failing that SIGKILL after CLI_KILLTIME, to ensure that it exits +CLI_TIMEOUT=10s +CLI_KILLTIME=30s + +EAP_7_WARNING="Warning! The CLI is running in a non-modular environment and cannot load commands from management extensions." + +run_cli_cmd() { + cmd="$1" + + #Default for EAP7 + cli_port=9990 + + if [ -f "$JBOSS_HOME/bin/run.sh" ]; then + version=$($JBOSS_HOME/bin/run.sh -V) + if [[ "$version" == *"JBoss Enterprise Application Platform 6"* ]]; then + cli_port=9999 + fi + fi + + if [ -n "${PORT_OFFSET}" ]; then + cli_port=$(($cli_port+$PORT_OFFSET)) + fi + + timeout --foreground -k "$CLI_KILLTIME" "$CLI_TIMEOUT" java -jar $JBOSS_HOME/bin/client/jboss-cli-client.jar --connect --controller=localhost:${cli_port} "$cmd" | grep -v "$EAP_7_WARNING" +} + +is_eap7() { + run_cli_cmd "version" | grep -q "^JBoss AS product: JBoss EAP 7" +} + +# Additional check necessary for EAP7, see CLOUD-615 +deployments_failed() { + ls -- /deployments/*failed >/dev/null 2>&1 || (is_eap7 && run_cli_cmd "deployment-info" | grep -q FAILED) +} + +list_failed_deployments() { + ls -- /deployments/*failed >/dev/null 2>&1 && \ + echo /deployments/*.failed | sed "s+^/deployments/\(.*\)\.failed$+\1+" +} diff --git a/os-eap-probes/3.0/added/probes/probe/__init__.py b/os-eap-probes/3.0/added/probes/probe/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/os-eap-probes/3.0/added/probes/probe/api.py b/os-eap-probes/3.0/added/probes/probe/api.py new file mode 100644 index 00000000..5fcaacc5 --- /dev/null +++ b/os-eap-probes/3.0/added/probes/probe/api.py @@ -0,0 +1,184 @@ +""" +Copyright 2017 Red Hat, Inc. + +Red Hat licenses this file to you under the Apache License, version +2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. +""" + +import json +import logging +import sys + +from enum import Enum + +def qualifiedClassName(obj): + """ + Utility method for returning the fully qualified class name of an instance. + Objects must be instances of "new classes." + """ + + return obj.__module__ + "." + type(obj).__name__ + +class Status(Enum): + """ + Represents the outcome of a test. + HARD_FAILURE: An unrecoverable failure, causing an immediate failure of + the probes, i.e. no extra tries to see if the probe will pass. + FAILURE: A test failed, but may succeed on a subsequent execution + NOT_READY: The functionality being tested is not in a failed state, but + is also not ready, e.g. it may still be starting up, rebalancing, etc. + READY: The functionality being tested is ready to handle requests. + """ + + HARD_FAILURE = 1 + FAILURE = 2 + NOT_READY = 4 + READY = 8 + + def __str__(self): + return self.name + + def __cmp__(self, other): + if type(other) is self.__class__: + return self.value - other.value + return NotImplemented + + def __le__(self, other): + if type(other) is self.__class__: + return self.value <= other.value + return NotImplemented + + def __lt__(self, other): + if type(other) is self.__class__: + return self.value < other.value + return NotImplemented + + def __ge__(self, other): + if type(other) is self.__class__: + return self.value >= other.value + return NotImplemented + + def __gt__(self, other): + if type(other) is self.__class__: + return self.value > other.value + return NotImplemented + +class Test(object): + """ + An object which provides a query and evaluates the response. A Probe may + consist of many tests, which determine the liveness or readiness of the + server. + """ + + def __init__(self, query): + self.query = query + + def getQuery(self): + """ + Returns the query used by this test. The return value is Probe + specific. Many Probe implementations use JSON for submitting queries, + which means this function would return a dict. + """ + return self.query + + def evaluate(self, results): + """ + Evaluate the response from the server, returning Status and messages. + messages should be returned as an object, list or dict. + """ + raise NotImplementedError("Implement evaluate() for Test: " + qualifiedClassName(self)) + +class Probe(object): + """ + Runs a series of tests against a server to determine its readiness or + liveness. + """ + + def __init__(self, tests = []): + self.tests = tests + + def addTest(self, test): + """ + Adds a test to this Probe. The Test must provide a query that is + compatible with the Probe implementation (e.g. a DMR request formatted + as JSON). The Test must be capable of understanding the results + returned by the Probe (e.g. a JSON response from DMR). + """ + + self.tests.append(test) + + def execute(self): + """ + Executes the queries and evaluates the tests and returns a set of Status + and messages collected for each test. + """ + + raise NotImplementedError("Implement execute() for Probe: " + qualifiedClassName(self)) + +class BatchingProbe(Probe): + """ + Base class which supports batching queries to be sent to a server and + splitting the results to correspond with the individual tests. + """ + + def __init__(self, tests = []): + super(BatchingProbe, self).__init__(tests) + self.logger = logging.getLogger(qualifiedClassName(self)) + + def execute(self): + self.logger.info("Executing the following tests: [%s]", ", ".join(qualifiedClassName(test) for test in self.tests)) + request = self.createRequest() + + try: + results = self.sendRequest(request) + status = set() + output = {} + for index, test in enumerate(self.tests): + self.logger.info("Executing test %s", qualifiedClassName(test)) + try: + testResults = self.getTestInput(results, index) + if self.logger.isEnabledFor(logging.DEBUG): + self.logger.debug("Test input = %s", json.dumps(testResults, indent=4, separators=(',', ': '))) + (state, messages) = test.evaluate(testResults) + self.logger.info("Test %s returned status %s", qualifiedClassName(test), str(state)) + status.add(state) + output[qualifiedClassName(test)] = messages + except: + self.logger.exception("Unexpected failure running test %s", qualifiedClassName(test)) + status.add(Status.FAILURE) + output[qualifiedClassName(test)] = "Exception executing test: %s" % (sys.exc_info()[1]) + return (status, output) + except: + self.logger.exception("Unexpected failure sending probe request") + return (set([Status.FAILURE]), "Error sending probe request: %s" % (sys.exc_info()[1])) + + def createRequest(self): + """ + Create the request to send to the server. Subclasses should include the + queries from all tests in the request. + """ + + raise NotImplementedError("Implement createRequest() for BatchingProbe: " + qualifiedClassName(self)) + + def sendRequest(self, request): + """ + Send the request to the server. + """ + + raise NotImplementedError("Implement sendRequest() for BatchingProbe: " + qualifiedClassName(self)) + + def getTestInput(self, results, testIndex): + """ + Return the results specific to the indexed test. + """ + + raise NotImplementedError("Implement getTestInput() for BatchingProbe: " + qualifiedClassName(self)) diff --git a/os-eap-probes/3.0/added/probes/probe/dmr.py b/os-eap-probes/3.0/added/probes/probe/dmr.py new file mode 100644 index 00000000..63250ed3 --- /dev/null +++ b/os-eap-probes/3.0/added/probes/probe/dmr.py @@ -0,0 +1,121 @@ +""" +Copyright 2017 Red Hat, Inc. + +Red Hat licenses this file to you under the Apache License, version +2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. +""" + +import json +import logging +import os +import requests +import sys + +from collections import OrderedDict + +from probe.api import qualifiedClassName, BatchingProbe, Status, Test + +class DmrProbe(BatchingProbe): + """ + A Probe implementation that sends a batch of queries to a server using EAP's + management interface. Tests should provide JSON queries specific to EAP's + management interface and should be able to handle DMR results. + """ + + def __init__(self, tests = []): + super(DmrProbe, self).__init__(tests) + self.logger = logging.getLogger(qualifiedClassName(self)) + self.__readConfig() + + def __readConfig(self): + """ + Configuration consists of: + host: localhost + port: 9990 + $PORT_OFFSET + user: $ADMIN_USERNAME + password: $ADMIN_PASSWORD + """ + + self.host = "localhost" + self.port = 9990 + int(os.getenv('PORT_OFFSET', 0)) + self.user = os.getenv('ADMIN_USERNAME') + self.password = os.getenv('ADMIN_PASSWORD') + if self.password != "": + if self.user is None or self.user == "": + self.user = os.getenv('DEFAULT_ADMIN_USERNAME') + self.logger.debug("Configuration set as follows: host=%s, port=%s, user=%s, password=***", self.host, self.port, self.user) + + def getTestInput(self, results, testIndex): + return list(results["result"].values())[testIndex] + + def createRequest(self): + steps = [] + for test in self.tests: + steps.append(test.getQuery()) + return { + "operation": "composite", + "address": [], + "json.pretty": 1, + "steps": steps + } + + def sendRequest(self, request): + url = "http://%s:%s/management" % (self.host, self.port) + self.logger.info("Sending probe request to %s", url) + if self.logger.isEnabledFor(logging.DEBUG): + self.logger.debug("Probe request = %s", json.dumps(request, indent=4, separators=(',', ': '))) + response = requests.post( + url, + json = request, + headers = { + "Accept": "text/plain" + }, + proxies = { + "http": None, + "https": None + }, + auth = requests.auth.HTTPDigestAuth(self.user, self.password) if self.user else None, + verify = False + ) + self.logger.debug("Probe response: %s", response) + + if response.status_code != 200: + """ + See if this non-200 represents an unusable response, or just a failure + response because one of the test steps failed, in which case we pass the + response to the tests to let them decide how to handle things + """ + self.failUnusableResponse(response, request, url) + + return response.json(object_pairs_hook = OrderedDict) + + def failUnusableResponse(self, response, request, url): + respDict = None + try: + respDict = response.json(object_pairs_hook = OrderedDict) + except ValueError: + self.logger.debug("Probe request failed with no parseable json response") + + unusable = not respDict or not respDict["outcome"] or respDict["outcome"] != "failed" or not respDict["result"] + if not unusable: + """ + An outcome=failed response is usable if the result node has an element for each test + """ + stepResults = list(respDict["result"].values()) + for index, test in enumerate(self.tests): + if not stepResults[index]: + unusable = True + break; + + if unusable: + self.logger.error("Probe request failed. Status code: %s", response.status_code) + raise Exception("Probe request failed, code: " + str(response.status_code) + str(url) + str(request) + str(response.json(object_pairs_hook = OrderedDict))) diff --git a/os-eap-probes/3.0/added/probes/probe/eap/__init__.py b/os-eap-probes/3.0/added/probes/probe/eap/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/os-eap-probes/3.0/added/probes/probe/eap/dmr.py b/os-eap-probes/3.0/added/probes/probe/eap/dmr.py new file mode 100644 index 00000000..67defce4 --- /dev/null +++ b/os-eap-probes/3.0/added/probes/probe/eap/dmr.py @@ -0,0 +1,234 @@ +""" +Copyright 2017 Red Hat, Inc. + +Red Hat licenses this file to you under the Apache License, version +2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. +""" + +import os +import re + +from probe.api import Status, Test +from probe.dmr import DmrProbe + +class EapProbe(DmrProbe): + """ + Basic EAP probe which uses the DMR interface to query server state. It + defines tests for server status, boot errors and deployment status. + """ + + def __init__(self): + super(EapProbe, self).__init__( + [ + ServerStatusTest(), + BootErrorsTest(), + DeploymentTest() + ] + ) + +class ServerStatusTest(Test): + """ + Checks the status of the server. + """ + + def __init__(self): + super(ServerStatusTest, self).__init__( + { + "operation": "read-attribute", + "name": "server-state" + } + ) + + def evaluate(self, results): + """ + Evaluates the test: + READY for "running" + FAILURE if the query itself failed + NOT_READY for all other states + """ + + if results["outcome"] != "success" and results.get("failure-description"): + return (Status.FAILURE, "DMR query failed") + if results["result"] == "running": + return (Status.READY, results["result"]) + return (Status.NOT_READY, results["result"]) + +class BootErrorsTest(Test): + """ + Checks the server for boot errors. + """ + + def __init__(self): + super(BootErrorsTest, self).__init__( + { + "operation": "read-boot-errors", + "address": { + "core-service": "management" + } + } + ) + self.__disableBootErrorsCheck = os.getenv("PROBE_DISABLE_BOOT_ERRORS_CHECK", "false").lower() == "true" + + def evaluate(self, results): + """ + Evaluates the test: + READY if no boot errors were returned + HARD_FAILURE if any boot errors were returned + FAILURE if the query itself failed + """ + + if self.__disableBootErrorsCheck: + return (Status.READY, "Boot errors check is disabled") + + if results["outcome"] != "success" and results.get("failure-description"): + return (Status.FAILURE, "DMR query failed") + + if results.get("result"): + errors = [] + errors.extend(results["result"]) + return (Status.HARD_FAILURE, errors) + + return (Status.READY, "No boot errors") + +class DeploymentTest(Test): + """ + Checks the state of the deployments. + """ + + def __init__(self): + super(DeploymentTest, self).__init__( + { + "operation": "read-attribute", + "address": { + "deployment": "*" + }, + "name": "status" + } + ) + + def evaluate(self, results): + """ + Evaluates the test: + READY if all deployments are OK + HARD_FAILURE if any deployments FAILED + FAILURE if the query failed or if any deployments are not OK, but not FAILED + """ + + if results["outcome"] != "success" and results.get("failure-description"): + return (Status.FAILURE, "DMR query failed") + + if not results["result"]: + return (Status.READY, "No deployments") + + status = set() + messages = {} + for result in results["result"]: + if result["outcome"] != "success" and result.get("failure-description"): + status.add(Status.FAILURE) + messages[result["address"][0]["deployment"]] = "DMR query failed" + else: + deploymentStatus = result["result"] + messages[result["address"][0]["deployment"]] = deploymentStatus + if deploymentStatus == "FAILED": + status.add(Status.HARD_FAILURE) + elif deploymentStatus == "OK": + status.add(Status.READY) + else: + status.add(Status.FAILURE) + + return (min(status), messages) + +class HealthCheckProbe(DmrProbe): + """ + Basic EAP probe which uses the DMR interface to query server state. It + defines tests for server status, boot errors and deployment status. + """ + + def __init__(self): + super(HealthCheckProbe, self).__init__( + [ + HealthCheckTest() + ] + ) + +class HealthCheckTest(Test): + """ + Checks the state of the Health Check subsystem, if installed. + We use a composite with a first step that does a simple read-resource + and a second step that reads the health check status. + A failure in the first step means the subsystem is not present and any + failure in the second step should be ignored as meaningless. + """ + + def __init__(self): + super(HealthCheckTest, self).__init__( + { + "operation": "composite", + "address": [], + "steps": [ + { + "operation": "read-resource", + "address": { + "subsystem": "microprofile-health-smallrye" + }, + "recursive" : False + }, + { + "operation": "check", + "address": { + "subsystem": "microprofile-health-smallrye" + } + } + ] + } + ) + + def evaluate(self, results): + """ + Evaluates the test: + if the overall composite failed with JBAS014883 or WFLYCTL0030 + READY as the failure means no health check extension configured on the system + elsif the 'read-resource' step failed: + READY as failure means no health check subsystem configured on the system + elsif the 'check' step succeeded: + READY if the 'check' step result's outcome field is 'UP' + HARD_FAILURE otherwise + else: + HARD_FAILURE as the query failed + + In no case do we return NOT_READY as MicroProfile Health Check is not a readiness check. + """ + + if results.get("failure-description") and re.compile("JBAS014883|WFLYCTL0030").search(str(results.get("failure-description"))): + return (Status.READY, "Health Check not configured") + + if not results.get("result") or not results["result"].get("step-1"): + return (Status.FAILURE, "DMR operation failed") + + if results["result"]["step-1"].get("outcome") != "success" and results["result"]["step-1"].get("failure-description"): + return (Status.READY, "Health Check not configured") + + if not results["result"].get("step-2"): + return (Status.HARD_FAILURE, "DMR operation failed " + str(results)) + + if results["result"]["step-2"].get("outcome") != "success" or not results["result"]["step-2"].get("result"): + return (Status.HARD_FAILURE, "DMR health check step failed " + str(results["result"]["step-2"])) + + status = results["result"]["step-2"]["result"].get("status") + if status == "UP": + return (Status.READY, "Status is UP") + + if status == "DOWN": + return (Status.HARD_FAILURE, "Status is DOWN") + + return (Status.HARD_FAILURE, "DMR health check step failed " + str(results["result"]["step-2"]["result"])) + diff --git a/os-eap-probes/3.0/added/probes/probe/eap/jolokia.py b/os-eap-probes/3.0/added/probes/probe/eap/jolokia.py new file mode 100644 index 00000000..3a5da9e2 --- /dev/null +++ b/os-eap-probes/3.0/added/probes/probe/eap/jolokia.py @@ -0,0 +1,189 @@ +""" +Copyright 2017 Red Hat, Inc. + +Red Hat licenses this file to you under the Apache License, version +2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. +""" + +import os +import re + +from probe.api import Status, Test +from probe.jolokia import JolokiaProbe + +class EapProbe(JolokiaProbe): + """ + Basic EAP probe which uses the Jolokia interface to query server state (i.e. + RESTful JMX queries). It defines tests for server status, boot errors and + deployment status. + """ + + def __init__(self): + super(EapProbe, self).__init__( + [ + ServerStatusTest(), + BootErrorsTest(), + DeploymentTest(), + HealthCheckTest() + ] + ) + +class ServerStatusTest(Test): + """ + Checks the status of the server. + """ + + def __init__(self): + super(ServerStatusTest, self).__init__( + { + "type": "read", + "attribute": "serverState", + "mbean": "jboss.as:management-root=server" + } + ) + + def evaluate(self, results): + """ + Evaluates the test: + READY for "running" + FAILURE if the query itself failed + NOT_READY for all other states + """ + + if results["status"] != 200: + return (Status.FAILURE, "Jolokia query failed") + if results["value"] == "running": + return (Status.READY, results["value"]) + return (Status.NOT_READY, results["value"]) + +class BootErrorsTest(Test): + """ + Checks the server for boot errors. + """ + + def __init__(self): + super(BootErrorsTest, self).__init__( + { + "type": "exec", + "operation": "readBootErrors", + "mbean": "jboss.as:core-service=management" + } + ) + self.__disableBootErrorsCheck = os.getenv("PROBE_DISABLE_BOOT_ERRORS_CHECK", "false").lower() == "true" + + def evaluate(self, results): + """ + Evaluates the test: + READY if no boot errors were returned + HARD_FAILURE if any boot errors were returned + FAILURE if the query itself failed + """ + + if self.__disableBootErrorsCheck: + return (Status.READY, "Boot errors check is disabled") + + if results["status"] != 200: + return (Status.FAILURE, "Jolokia query failed") + + if results["value"]: + errors = [] + errors.extend(results["value"]) + return (Status.HARD_FAILURE, errors) + + return (Status.READY, "No boot errors") + +class DeploymentTest(Test): + """ + Checks the state of the deployments. + """ + + def __init__(self): + super(DeploymentTest, self).__init__( + { + "type": "read", + "attribute": "status", + "mbean": "jboss.as:deployment=*" + } + ) + + def evaluate(self, results): + """ + Evaluates the test: + READY for a 404 due to InstanceNotFoundException as that means no deployments configured on the system + READY if all deployments are OK + HARD_FAILURE if any deployments FAILED + FAILURE if the query failed or if any deployments are not OK, but not FAILED + """ + + if results["status"] == 404 and results.get("error_type") and re.compile(".*InstanceNotFoundException.*").match(results.get("error_type")): + return (Status.READY, "No deployments") + + if results["status"] != 200: + return (Status.FAILURE, "Jolokia query failed") + + if not results["value"]: + return (Status.READY, "No deployments") + + status = set() + messages = {} + for key, value in results["value"].items(): + deploymentStatus = value["status"] + messages[key.rsplit("=",1)[1]] = deploymentStatus + if deploymentStatus == "FAILED": + status.add(Status.HARD_FAILURE) + elif deploymentStatus == "OK": + status.add(Status.READY) + else: + status.add(Status.FAILURE) + return (min(status), messages) + +class HealthCheckTest(Test): + """ + Checks the state of the Health Check subsystem, if installed. + """ + + def __init__(self): + super(HealthCheckTest, self).__init__( + { + "type": "exec", + "operation": "check", + "mbean": "jboss.as:subsystem=microprofile-health-smallrye" + } + ) + + def evaluate(self, results): + """ + Evaluates the test: + READY for a 404 due to InstanceNotFoundException as that means no health check configured on the system + HARD_FAILURE for any other non-200 as the query failed + READY if the result value's outcome field is 'UP' + HARD_FAILURE otherwise + + In no case do we return NOT_READY as MicroProfile Health Check is not a readiness check. + """ + + if results["status"] == 404 and results.get("error_type") and re.compile(".*InstanceNotFoundException.*").match(results.get("error_type")): + return (Status.READY, "Health Check not configured") + + if results["status"] != 200 or not results.get("value"): + return (Status.HARD_FAILURE, "Jolokia query failed " + str(results)) + + outcome = results["value"].get("outcome") + + if not outcome: + return (Status.HARD_FAILURE, "No outcome") + + if re.compile("\W*UP\W*").match(outcome): + return (Status.READY, "Status is UP") + + return (Status.HARD_FAILURE, outcome) + diff --git a/os-eap-probes/3.0/added/probes/probe/jolokia.py b/os-eap-probes/3.0/added/probes/probe/jolokia.py new file mode 100644 index 00000000..03b39944 --- /dev/null +++ b/os-eap-probes/3.0/added/probes/probe/jolokia.py @@ -0,0 +1,104 @@ +""" +Copyright 2017 Red Hat, Inc. + +Red Hat licenses this file to you under the Apache License, version +2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. +""" + +import json +import logging +import os +import requests +import sys +import configparser as ConfigParser +from io import StringIO + +from collections import OrderedDict + +from probe.api import qualifiedClassName, BatchingProbe, Status, Test + +class JolokiaProbe(BatchingProbe): + """ + A Probe implementation that sends a batch of queries to a server using + Jolokia's REST API. Tests should provide JSON queries specific to Jolokia + and should be able to handle Jolokia formatted results. + """ + + def __init__(self, tests = []): + super(JolokiaProbe, self).__init__(tests) + self.logger = logging.getLogger(qualifiedClassName(self)) + self.__readConfig() + + def __readConfig(self): + """ + Configuration is read from /opt/jolokia/etc/jolokia.properties and + consists of: + host: localhost + port: jolokia.port + $PORT_OFFSET + protocol: jolokia.protocol + user: jolokia.user + password: jolokia.password + """ + + jolokiaConfig = ConfigParser.ConfigParser( + defaults = { + "port": 8778, + "user": None, + "password": None, + "protocol": "http" + } + ) + + self.logger.info("Reading jolokia properties file") + with open("/opt/jolokia/etc/jolokia.properties") as jolokiaProperties: + # fake a section + jolokiaConfig.readfp(StringIO("[jolokia]\n" + jolokiaProperties.read())) + + self.host = "localhost" + self.port = int(jolokiaConfig.get("jolokia", "port")) + int(os.getenv('PORT_OFFSET', 0)) + self.protocol = jolokiaConfig.get("jolokia", "protocol") + self.user = jolokiaConfig.get("jolokia", "user") + self.password = jolokiaConfig.get("jolokia", "password") + + self.logger.debug("Configuration set as follows: host=%s, port=%s, protocol=%s, user=%s, password=***", self.host, self.port, self.protocol, self.user) + + def getTestInput(self, results, testIndex): + return results[testIndex] + + def createRequest(self): + request = [] + for test in self.tests: + request.append(test.getQuery()) + return request + + def sendRequest(self, request): + url = "%s://%s:%s/jolokia/" % (self.protocol, self.host, self.port) + self.logger.info("Sending probe request to %s", url) + if self.logger.isEnabledFor(logging.DEBUG): + self.logger.debug("Probe request = %s", json.dumps(request, indent=4, separators=(',', ': '))) + response = requests.post( + url, + json = request, + proxies = { + "http": None, + "https": None + }, + auth = requests.auth.HTTPBasicAuth(self.user, self.password) if self.user else None, + verify = False + ) + self.logger.debug("Probe response: %s", response) + + if response.status_code != 200: + self.logger.error("Probe request failed. Status code: %s", response.status_code) + raise Exception("Probe request failed, code: " + str(response.status_code)) + + return response.json(object_pairs_hook = OrderedDict) diff --git a/os-eap-probes/3.0/added/probes/runner.py b/os-eap-probes/3.0/added/probes/runner.py new file mode 100644 index 00000000..c39f57a9 --- /dev/null +++ b/os-eap-probes/3.0/added/probes/runner.py @@ -0,0 +1,109 @@ +""" +Copyright 2017 Red Hat, Inc. + +Red Hat licenses this file to you under the Apache License, version +2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. +""" + +import argparse +import importlib +import json +import logging +import time + +from probe.api import qualifiedClassName, Status + +class ProbeRunner(object): + """ + Simply executes a series of Probes, returning the combined Status and + messages. + """ + + def __init__(self, probes = []): + self.probes = probes + self.logger = logging.getLogger(qualifiedClassName(self)) + + def addProbe(self, probe): + self.probes.append(probe) + + def executeProbes(self): + self.logger.info("Running the following probes: [%s]", ", ".join(qualifiedClassName(probe) for probe in self.probes)) + results = set() + output = {} + for probe in self.probes: + self.logger.info("Running probe: %s", qualifiedClassName(probe)) + (statuses, messages) = probe.execute() + self.logger.info("Probe %s returned statuses [%s]", qualifiedClassName(probe), ", ".join(str(status) for status in statuses)) + if self.logger.isEnabledFor(logging.DEBUG): + self.logger.debug("Probe %s returned messages %s", qualifiedClassName(probe), json.dumps(messages, indent=4, separators=(',', ': '))) + results |= statuses + output[qualifiedClassName(probe)] = messages + return (results, output) + +def toStatus(value): + """ + Helper method which converts a string to a Status. Used by the + ArgumentParser. + """ + + return Status[value] + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = "Executes the specified probes returning cleanly if probe status matches desired status") + parser.add_argument("-c", "--check", required = True, type = toStatus, action = "append", help = "The acceptable probe statuses, may be: READY, NOT_READY.") + parser.add_argument("-d", "--debug", action = "store_true", help = "Enable debugging") + parser.add_argument("--logfile", help = "Log file.") + parser.add_argument("--loglevel", default = "CRITICAL", choices = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help = "Log level.") + parser.add_argument("probes", nargs = argparse.REMAINDER, help = "The probes to execute.") + + args = parser.parse_args() + + # don't spam warnings (e.g. when not verifying ssl connections) + logging.captureWarnings(True) + + if args.logfile: + logging.basicConfig(filename = args.logfile, format = '%(asctime)s %(levelname)s [%(name)s] %(message)s', level = args.loglevel.upper()) + else: + logging.basicConfig(level = args.loglevel.upper()) + + logger = logging.getLogger(__name__) + + logger.debug("Starting probe runner with args: %s", args) + + runner = ProbeRunner() + for probe in args.probes: + logger.info("Loading probe: %s", probe) + probeModule = importlib.import_module(probe.rsplit(".", 1)[0]) + probeClass = getattr(probeModule, probe.rsplit(".", 1)[1]) + runner.addProbe(probeClass()) + + okStatus = set(args.check) + + logger.info("Probes will fail for the following states: [%s]", ", ".join(str(status) for status in set(Status) - okStatus)) + + probeStatus = set() + output = {} + logger.info("Running probes") + (probeStatus, output) = runner.executeProbes() + if okStatus >= probeStatus: + logger.info("Probes succeeded") + if args.debug: + print(json.dumps(output, indent=4, separators=(',', ': '))) + exit(0) + if Status.HARD_FAILURE in probeStatus: + logger.error("Probes detected HARD_FAILURE.") + + # we didn't succeed + logger.error("Probe failure. Probes did not succeed.") + # print so the output is available to users in the OpenShift event log + print(json.dumps(output, indent=4, separators=(',', ': '))) + exit(1) diff --git a/os-eap-probes/3.0/added/readinessProbe.sh b/os-eap-probes/3.0/added/readinessProbe.sh new file mode 100644 index 00000000..2bb7b8ea --- /dev/null +++ b/os-eap-probes/3.0/added/readinessProbe.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +. "$JBOSS_HOME/bin/probe_common.sh" + +OUTPUT=/tmp/readiness-output +ERROR=/tmp/readiness-error +LOG=/tmp/readiness-log + +COUNT=30 +SLEEP=5 +DEBUG=${SCRIPT_DEBUG:-false} +PROBE_IMPL="probe.eap.dmr.EapProbe probe.eap.dmr.HealthCheckProbe" + +if [ $# -gt 0 ] ; then + DEBUG=$1 +fi + +if [ $# -gt 1 ] ; then + PROBE_IMPL=$2 +fi + +if [ "$DEBUG" = "true" ]; then + DEBUG_OPTIONS="--debug --logfile $LOG --loglevel DEBUG" +fi + +if python $JBOSS_HOME/bin/probes/runner.py -c READY $DEBUG_OPTIONS $PROBE_IMPL; then + exit 0 +fi +exit 1 + diff --git a/os-eap-probes/3.0/configure.sh b/os-eap-probes/3.0/configure.sh new file mode 100644 index 00000000..1432ab08 --- /dev/null +++ b/os-eap-probes/3.0/configure.sh @@ -0,0 +1,15 @@ +#!/bin/sh +set -e + +SCRIPT_DIR=$(dirname $0) +ADDED_DIR=${SCRIPT_DIR}/added + +# Add liveness and readiness probes and helper library +cp -r "$ADDED_DIR"/* $JBOSS_HOME/bin/ + +chown -R jboss:root $JBOSS_HOME/bin/ +chmod -R g+rwX $JBOSS_HOME/bin/ + +# ensure added scripts are executable +chmod ug+x $JBOSS_HOME/bin/readinessProbe.sh $JBOSS_HOME/bin/livenessProbe.sh +chmod -R ug+x $JBOSS_HOME/bin/probes diff --git a/os-eap-probes/3.0/module.yaml b/os-eap-probes/3.0/module.yaml new file mode 100644 index 00000000..3d9fd056 --- /dev/null +++ b/os-eap-probes/3.0/module.yaml @@ -0,0 +1,14 @@ +schema_version: 1 +name: os-eap-probes +version: '3.0' +description: os-eap-probes script package for python 3 +execute: +- script: configure.sh + user: '185' +packages: + install: + - python3-requests +envs: + - name: "PROBE_DISABLE_BOOT_ERRORS_CHECK" + example: "true" + description: Disable the boot errors check in the probes.