Skip to content

Commit

Permalink
Merge pull request #19744 from jan--f/mgr-prometheus-standby-mondown
Browse files Browse the repository at this point in the history
pybind/mgr/prometheus: add StandbyModule and handle failed MON cluster

Reviewed-by: John Spray <john.spray@redhat.com>
  • Loading branch information
John Spray committed Jan 22, 2018
2 parents 85847c5 + 4a45b02 commit c05d963
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 11 deletions.
65 changes: 65 additions & 0 deletions qa/tasks/mgr/test_prometheus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@


from mgr_test_case import MgrTestCase

import logging
import requests


log = logging.getLogger(__name__)


class TestPrometheus(MgrTestCase):
MGRS_REQUIRED = 3

def test_standby(self):
self._assign_ports("prometheus", "server_port")
self._load_module("prometheus")

original_active = self.mgr_cluster.get_active_id()

original_uri = self._get_uri("prometheus")
log.info("Originally running at {0}".format(original_uri))

self.mgr_cluster.mgr_fail(original_active)

failed_over_uri = self._get_uri("prometheus")
log.info("After failover running at {0}".format(failed_over_uri))

self.assertNotEqual(original_uri, failed_over_uri)

# The original active daemon should have come back up as a standby
# and serve some html under "/" and an empty answer under /metrics
r = requests.get(original_uri, allow_redirects=False)
self.assertEqual(r.status_code, 200)
r = requests.get(original_uri + "metrics", allow_redirects=False)
self.assertEqual(r.status_code, 200)
self.assertEqual(r.headers["content-type"], "text/plain;charset=utf-8")

def test_urls(self):
self._assign_ports("prometheus", "server_port")
self._load_module("prometheus")

base_uri = self._get_uri("prometheus")

# This is a very simple smoke test to check that the module can
# give us a 200 response to requests. We're not testing that
# the content is correct or even renders!

urls = [
"/",
"/metrics"
]

failures = []

for url in urls:
r = requests.get(base_uri + url, allow_redirects=False)
if r.status_code != 200:
failures.append(url)

log.info("{0}: {1} ({2} bytes)".format(
url, r.status_code, len(r.content)
))

self.assertListEqual(failures, [])
81 changes: 70 additions & 11 deletions src/pybind/mgr/prometheus/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import errno
import math
import os
import socket
from collections import OrderedDict
from mgr_module import MgrModule
from mgr_module import MgrModule, MgrStandbyModule

# Defaults for the Prometheus HTTP server. Can also set in config-key
# see https://github.com/prometheus/prometheus/wiki/Default-port-allocations
Expand Down Expand Up @@ -137,8 +138,6 @@ class Module(MgrModule):

def __init__(self, *args, **kwargs):
super(Module, self).__init__(*args, **kwargs)
self.notified = False
self.serving = False
self.metrics = self._setup_static_metrics()
self.schema = OrderedDict()
_global_instance['plugin'] = self
Expand Down Expand Up @@ -230,10 +229,6 @@ def _setup_static_metrics(self):

return metrics

def shutdown(self):
self.serving = False
pass

def get_health(self):
health = json.loads(self.get('health')['json'])
self.metrics['health_status'].set(
Expand Down Expand Up @@ -389,10 +384,13 @@ def index(self):

@cherrypy.expose
def metrics(self):
metrics = global_instance().collect()
cherrypy.response.headers['Content-Type'] = 'text/plain'
if metrics:
return self.format_metrics(metrics)
if global_instance().have_mon_connection():
metrics = global_instance().collect()
cherrypy.response.headers['Content-Type'] = 'text/plain'
if metrics:
return self.format_metrics(metrics)
else:
raise cherrypy.HTTPError(503, 'No MON connection')

server_addr = self.get_localized_config('server_addr', DEFAULT_ADDR)
server_port = self.get_localized_config('server_port', DEFAULT_PORT)
Expand All @@ -401,11 +399,72 @@ def metrics(self):
(server_addr, server_port)
)

# Publish the URI that others may use to access the service we're
# about to start serving
self.set_uri('http://{0}:{1}/'.format(
socket.getfqdn() if server_addr == '::' else server_addr,
server_port
))

cherrypy.config.update({
'server.socket_host': server_addr,
'server.socket_port': int(server_port),
'engine.autoreload.on': False
})
cherrypy.tree.mount(Root(), "/")
self.log.info('Starting engine...')
cherrypy.engine.start()
self.log.info('Engine started.')
cherrypy.engine.block()

def shutdown(self):
self.log.info('Stopping engine...')
cherrypy.engine.wait(state=cherrypy.engine.states.STARTED)
cherrypy.engine.exit()
self.log.info('Stopped engine')


class StandbyModule(MgrStandbyModule):
def serve(self):
server_addr = self.get_localized_config('server_addr', '::')
server_port = self.get_localized_config('server_port', DEFAULT_PORT)
self.log.info("server_addr: %s server_port: %s" % (server_addr, server_port))
cherrypy.config.update({
'server.socket_host': server_addr,
'server.socket_port': int(server_port),
'engine.autoreload.on': False
})

module = self

class Root(object):

@cherrypy.expose
def index(self):
active_uri = module.get_active_uri()
return '''<!DOCTYPE html>
<html>
<head><title>Ceph Exporter</title></head>
<body>
<h1>Ceph Exporter</h1>
<p><a href='{}metrics'>Metrics</a></p>
</body>
</html>'''.format(active_uri)

@cherrypy.expose
def metrics(self):
cherrypy.response.headers['Content-Type'] = 'text/plain'
return ''

cherrypy.tree.mount(Root(), '/', {})
self.log.info('Starting engine...')
cherrypy.engine.start()
self.log.info("Waiting for engine...")
cherrypy.engine.wait(state=cherrypy.engine.states.STOPPED)
self.log.info('Engine started.')

def shutdown(self):
self.log.info("Stopping engine...")
cherrypy.engine.wait(state=cherrypy.engine.states.STARTED)
cherrypy.engine.stop()
self.log.info("Stopped engine")

0 comments on commit c05d963

Please sign in to comment.