This repository has been archived by the owner on Jan 30, 2024. It is now read-only.
/
check_json_healthcheck
executable file
·169 lines (124 loc) · 4.71 KB
/
check_json_healthcheck
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env python3
"""
Query an app's healthcheck endpoint.
Some of our apps are now following this convention for their healthcheck
information:
{
"status": "ok",
"checks": {
"sufficient_devops": {
"status": "ok"
},
"whizzbangs_recently_frobnicated": {
"status": "ok"
}
}
}
This script can be invoked with the healthcheck port, path, host(optional) and
protocol(optional) as arguments:
check_json_healthcheck 3000 /healthcheck host_path https
"""
from contextlib import closing
import json
from socket import timeout
import sys
from urllib.request import urlopen, Request, HTTPError
from urllib.parse import urlunparse
STATUSES = ["ok", "warning", "critical", "unknown"]
def report(exit_code, message, detail_message=None):
"""Report the check's result to STDOUT, then exit."""
prefix = STATUSES[exit_code].upper()
print('%s: %s' % (prefix, message))
if detail_message:
print("\n" + detail_message)
exit(exit_code)
def report_error(message):
"""Report an error running this check, then exit."""
report(STATUSES.index("critical"), message)
def handle_exception(exc_type, exception, traceback):
"""Report uncaught exceptions to Nagios as UNKNOWN (exit code 3)."""
report(STATUSES.index("unknown"), "unhandled exception: %s" % (exception,))
def url_from_arguments(arguments):
"""Construct a health check URL from command-line arguments.
The first argument should be the port; the second argument the healthcheck
path; the third is the host; fourth is the protocol.
The host and protocol is optional, if it's not specified the default is localhost
over HTTP.
"""
check_port = int(arguments[0])
check_path = arguments[1]
try:
check_host = arguments[2]
except IndexError:
check_host = 'localhost'
try:
protocol = arguments[3]
except IndexError:
protocol = 'http'
return urlunparse((
protocol,
"%s:%d" % (check_host, check_port),
check_path,
None, # params
None, # query
None # fragment
))
def json_request(request_url):
"""Create a Request object suitable to pass into `urlopen`.
We can't just pass the URL itself into `urlopen`, because we need to add
an explicit Accept header to be sure the apps we're checking will return a
suitable response.
"""
return Request(request_url,
data=None,
headers={"Accept": "application/json"})
class HealthCheckInfo(object):
"""Store and encapsulate a healthcheck data structure.
See this module's docstring for an example of such a data structure.
"""
def __init__(self, healthcheck_dict):
if "status" not in healthcheck_dict:
raise ValueError("Missing overall status value")
self._dict = healthcheck_dict
@property
def overall_status(self):
return self._dict["status"]
@property
def overall_status_code(self):
try:
return STATUSES.index(self.overall_status)
except ValueError:
raise ValueError("unrecognised overall status: '%s'" %
(self.overall_status,))
@property
def check_statuses(self):
"""Return a list of strings summarising the status of each check.
For example:
["sufficient_whizbangs - ok", "recently_dusted - warning"]
"""
checks = self._dict.get("checks", {})
return [" - ".join(self._check_parts(check_name, check_value))
for (check_name, check_value) in checks.items()]
def _check_parts(self, check_name, check_value):
parts = [check_name, check_value["status"]]
if check_value.get("message") is not None:
parts.append(check_value["message"])
return parts
if __name__ == "__main__":
sys.excepthook = handle_exception
# Any exceptions from this will fall through to the exception hook above
check_url = url_from_arguments(sys.argv[1:])
try:
with closing(urlopen(json_request(check_url), timeout=20)) as response:
healthcheck_info = HealthCheckInfo(json.loads(response.read().decode('UTF-8')))
except HTTPError as e:
report_error("healthcheck returned HTTP error %d" % (e.code,))
except:
report_error(sys.exc_info()[1])
if healthcheck_info.check_statuses:
# Each status formatted as, for example, "foo_working: ok"
detail = "\n".join(["Checks:"] + healthcheck_info.check_statuses)
else:
detail = None
report(healthcheck_info.overall_status_code,
"overall status %s" % (healthcheck_info.overall_status,), detail)