Skip to content

Commit

Permalink
ceph-exporter: cephadm changes
Browse files Browse the repository at this point in the history
Fixes: https://tracker.ceph.com/issues/55046
Signed-off-by: Avan Thakkar <athakkar@redhat.com>

Includes changes required for introducing ceph-exporter container in cephadm.

(cherry picked from commit 0253053)

Conflicts:
	src/pybind/mgr/cephadm/module.py
	src/pybind/mgr/cephadm/service_discovery.py
	src/pybind/mgr/cephadm/services/cephadmservice.py
	src/pybind/mgr/cephadm/services/monitoring.py
	src/pybind/mgr/cephadm/tests/test_services.py
	src/python-common/ceph/deployment/service_spec.py
  • Loading branch information
avanthakkar authored and adk3798 committed Feb 15, 2023
1 parent e1dad2e commit 5f04222
Show file tree
Hide file tree
Showing 12 changed files with 233 additions and 17 deletions.
72 changes: 68 additions & 4 deletions src/cephadm/cephadm
Expand Up @@ -371,7 +371,7 @@ class UnauthorizedRegistryError(Error):

class Ceph(object):
daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror',
'crash', 'cephfs-mirror')
'crash', 'cephfs-mirror', 'ceph-exporter')

##################################

Expand Down Expand Up @@ -932,6 +932,64 @@ class CephIscsi(object):
##################################


class CephExporter(object):
"""Defines a Ceph exporter container"""

daemon_type = 'ceph-exporter'
entrypoint = '/usr/bin/ceph-exporter'
DEFAULT_PORT = 9926
port_map = {
'ceph-exporter': DEFAULT_PORT,
}

def __init__(self,
ctx: CephadmContext,
fsid: str, daemon_id: Union[int, str],
config_json: Dict[str, Any],
image: str = DEFAULT_IMAGE) -> None:
self.ctx = ctx
self.fsid = fsid
self.daemon_id = daemon_id
self.image = image

self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/')
self.addrs = config_json.get('addrs', socket.gethostbyname(socket.gethostname()))
self.port = config_json.get('port', self.DEFAULT_PORT)
self.prio_limit = config_json.get('prio-limit', 5)
self.stats_period = config_json.get('stats-period', 5)

self.validate()

@classmethod
def init(cls, ctx: CephadmContext, fsid: str,
daemon_id: Union[int, str]) -> 'CephExporter':
return cls(ctx, fsid, daemon_id,
get_parm(ctx.config_json), ctx.image)

@staticmethod
def get_container_mounts() -> Dict[str, str]:
mounts = dict()
mounts['/var/run/ceph'] = '/var/run/ceph:z'
return mounts

def get_daemon_args(self) -> List[str]:
args = [
f'--sock-dir={self.sock_dir}',
f'--addrs={self.addrs}',
f'--port={self.port}',
f'--prio-limit={self.prio_limit}',
f'--stats-period={self.stats_period}',
]
return args

def validate(self) -> None:
if not os.path.isdir(self.sock_dir):
raise Error(f'Directory does not exist. Got: {self.sock_dir}')


##################################


class HAproxy(object):
"""Defines an HAproxy container"""
daemon_type = 'haproxy'
Expand Down Expand Up @@ -2551,7 +2609,7 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
# type: (CephadmContext, str, str, Union[int, str]) -> List[str]
r = list() # type: List[str]

if daemon_type in Ceph.daemons and daemon_type != 'crash':
if daemon_type in Ceph.daemons and daemon_type not in ['crash', 'ceph-exporter']:
r += [
'--setuser', 'ceph',
'--setgroup', 'ceph',
Expand Down Expand Up @@ -2618,6 +2676,9 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
elif daemon_type == NFSGanesha.daemon_type:
nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
r += nfs_ganesha.get_daemon_args()
elif daemon_type == CephExporter.daemon_type:
ceph_exporter = CephExporter.init(ctx, fsid, daemon_id)
r.extend(ceph_exporter.get_daemon_args())
elif daemon_type == HAproxy.daemon_type:
haproxy = HAproxy.init(ctx, fsid, daemon_id)
r += haproxy.get_daemon_args()
Expand Down Expand Up @@ -2879,7 +2940,7 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
mounts[data_dir] = cdata_dir + ':z'
if not no_config:
mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash']:
if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash', 'ceph-exporter']:
# these do not search for their keyrings in a data directory
mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)

Expand Down Expand Up @@ -3038,6 +3099,9 @@ def get_container(ctx: CephadmContext,
entrypoint = NFSGanesha.entrypoint
name = '%s.%s' % (daemon_type, daemon_id)
envs.extend(NFSGanesha.get_container_envs())
elif daemon_type == CephExporter.daemon_type:
entrypoint = CephExporter.entrypoint
name = 'client.ceph-exporter.%s' % daemon_id
elif daemon_type == HAproxy.daemon_type:
name = '%s.%s' % (daemon_type, daemon_id)
container_args.extend(['--user=root']) # haproxy 2.4 defaults to a different user
Expand Down Expand Up @@ -5156,7 +5220,7 @@ def prepare_ssh(
cli(['orch', 'apply', 'crash'])

if not ctx.skip_monitoring_stack:
for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
for t in ['ceph-exporter', 'prometheus', 'grafana', 'node-exporter', 'alertmanager']:
logger.info('Deploying %s service with default placement...' % t)
cli(['orch', 'apply', t])

Expand Down
17 changes: 16 additions & 1 deletion src/pybind/mgr/cephadm/agent.py
Expand Up @@ -15,7 +15,7 @@
from ceph.utils import datetime_now
from ceph.deployment.inventory import Devices
from ceph.deployment.service_spec import ServiceSpec, PlacementSpec
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephExporterService
from cephadm.services.ingress import IngressSpec

from datetime import datetime, timedelta
Expand Down Expand Up @@ -183,6 +183,8 @@ def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
return self.node_exporter_sd_config()
elif service == 'haproxy':
return self.haproxy_sd_config()
elif service == 'ceph-exporter':
return self.ceph_exporter_sd_config()
else:
return []

Expand Down Expand Up @@ -237,6 +239,19 @@ def haproxy_sd_config(self) -> List[Dict[str, Collection[str]]]:
})
return srv_entries

def ceph_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]:
"""Return <http_sd_config> compatible prometheus config for ceph-exporter service."""
srv_entries = []
for dd in self.mgr.cache.get_daemons_by_service('ceph-exporter'):
assert dd.hostname is not None
addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
port = dd.ports[0] if dd.ports else CephExporterService.DEFAULT_SERVICE_PORT
srv_entries.append({
'targets': [build_url(host=addr, port=port).lstrip('/')],
'labels': {'instance': dd.hostname}
})
return srv_entries

@cherrypy.expose(alias='prometheus/rules')
def get_prometheus_rules(self) -> str:
"""Return currently configured prometheus rules as Yaml."""
Expand Down
14 changes: 10 additions & 4 deletions src/pybind/mgr/cephadm/module.py
Expand Up @@ -47,7 +47,8 @@
from . import ssh
from .migrations import Migrations
from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \
RbdMirrorService, CrashService, CephadmService, CephfsMirrorService, CephadmAgent
RbdMirrorService, CrashService, CephadmService, CephfsMirrorService, CephadmAgent, \
CephExporterService
from .services.ingress import IngressService
from .services.container import CustomContainerService
from .services.iscsi import IscsiService
Expand Down Expand Up @@ -538,7 +539,7 @@ def __init__(self, *args: Any, **kwargs: Any):
RgwService, RbdMirrorService, GrafanaService, AlertmanagerService,
PrometheusService, NodeExporterService, LokiService, PromtailService, CrashService, IscsiService,
IngressService, CustomContainerService, CephfsMirrorService,
CephadmAgent, SNMPGatewayService
CephadmAgent, SNMPGatewayService, CephExporterService
]

# https://github.com/python/mypy/issues/8993
Expand Down Expand Up @@ -691,7 +692,7 @@ def get_unique_name(
Generate a unique random service name
"""
suffix = daemon_type not in [
'mon', 'crash',
'mon', 'crash', 'ceph-exporter',
'prometheus', 'node-exporter', 'grafana', 'alertmanager',
'container', 'agent', 'snmp-gateway', 'loki', 'promtail'
]
Expand Down Expand Up @@ -2432,7 +2433,7 @@ def _calc_daemon_deps(self,
deps = [self.get_mgr_ip()]
else:
need = {
'prometheus': ['mgr', 'alertmanager', 'node-exporter', 'ingress'],
'prometheus': ['mgr', 'alertmanager', 'node-exporter', 'ingress', 'ceph-exporter'],
'grafana': ['prometheus', 'loki'],
'alertmanager': ['mgr', 'alertmanager', 'snmp-gateway'],
'promtail': ['loki'],
Expand Down Expand Up @@ -2730,6 +2731,7 @@ def _apply_service_spec(self, spec: ServiceSpec) -> str:
'alertmanager': PlacementSpec(count=1),
'prometheus': PlacementSpec(count=1),
'node-exporter': PlacementSpec(host_pattern='*'),
'ceph-exporter': PlacementSpec(host_pattern='*'),
'loki': PlacementSpec(count=1),
'promtail': PlacementSpec(host_pattern='*'),
'crash': PlacementSpec(host_pattern='*'),
Expand Down Expand Up @@ -2840,6 +2842,10 @@ def apply_promtail(self, spec: ServiceSpec) -> str:
def apply_node_exporter(self, spec: ServiceSpec) -> str:
return self._apply(spec)

@handle_orch_error
def apply_ceph_exporter(self, spec: ServiceSpec) -> str:
return self._apply(spec)

@handle_orch_error
def apply_crash(self, spec: ServiceSpec) -> str:
return self._apply(spec)
Expand Down
35 changes: 31 additions & 4 deletions src/pybind/mgr/cephadm/services/cephadmservice.py
Expand Up @@ -10,9 +10,9 @@

from mgr_module import HandleCommandResult, MonCommandFailed

from ceph.deployment.service_spec import ServiceSpec, RGWSpec
from ceph.deployment.service_spec import ServiceSpec, RGWSpec, CephExporterSpec
from ceph.deployment.utils import is_ipv6, unwrap_ipv6
from mgr_util import build_url
from mgr_util import build_url, merge_dicts
from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus
from orchestrator._interface import daemon_type_to_service
from cephadm import utils
Expand All @@ -32,7 +32,7 @@ def get_auth_entity(daemon_type: str, daemon_id: str, host: str = "") -> AuthEnt
"""
# despite this mapping entity names to daemons, self.TYPE within
# the CephService class refers to service types, not daemon types
if daemon_type in ['rgw', 'rbd-mirror', 'cephfs-mirror', 'nfs', "iscsi", 'ingress']:
if daemon_type in ['rgw', 'rbd-mirror', 'cephfs-mirror', 'nfs', "iscsi", 'ingress', 'ceph-exporter']:
return AuthEntity(f'client.{daemon_type}.{daemon_id}')
elif daemon_type in ['crash', 'agent']:
if host == "":
Expand Down Expand Up @@ -513,7 +513,6 @@ def get_config_and_keyring(self,
'prefix': 'auth get',
'entity': entity,
})

config = self.mgr.get_minimal_ceph_conf()

if extra_ceph_config:
Expand Down Expand Up @@ -1040,6 +1039,34 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
return daemon_spec


class CephExporterService(CephService):
TYPE = 'ceph-exporter'
DEFAULT_SERVICE_PORT = 9926

def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
assert self.TYPE == daemon_spec.daemon_type
spec = cast(CephExporterSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_spec.daemon_id),
['mon', 'profile ceph-exporter',
'mon', 'allow r',
'mgr', 'allow r',
'osd', 'allow r'])
exporter_config = {}
if spec.sock_dir:
exporter_config.update({'sock-dir': spec.sock_dir})
if spec.port:
exporter_config.update({'port': f'{spec.port}'})
if spec.prio_limit is not None:
exporter_config.update({'prio-limit': f'{spec.prio_limit}'})
if spec.stats_period:
exporter_config.update({'stats-period': f'{spec.stats_period}'})

daemon_spec.keyring = keyring
daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config)
return daemon_spec


class CephfsMirrorService(CephService):
TYPE = 'cephfs-mirror'

Expand Down
13 changes: 13 additions & 0 deletions src/pybind/mgr/cephadm/services/monitoring.py
Expand Up @@ -371,11 +371,24 @@ def generate_config(
"service": dd.service_name(),
})

# scrape ceph-exporters
ceph_exporter_targets = []
for dd in self.mgr.cache.get_daemons_by_service('ceph-exporter'):
assert dd.hostname is not None
deps.append(dd.name())
addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
port = dd.ports[0] if dd.ports else 9926
ceph_exporter_targets.append({
'url': build_url(host=addr, port=port).lstrip('/'),
'hostname': dd.hostname
})

# generate the prometheus configuration
context = {
'alertmgr_targets': alertmgr_targets,
'mgr_scrape_list': mgr_scrape_list,
'haproxy_targets': haproxy_targets,
'ceph_exporter_targets': ceph_exporter_targets,
'nodes': nodes,
}
r: Dict[str, Any] = {
Expand Down
Expand Up @@ -39,3 +39,14 @@ scrape_configs:
instance: '{{ haproxy.service }}'
{% endfor %}
{% endif %}

{% if ceph_exporter_targets %}
- job_name: 'ceph-exporter'
honor_labels: true
static_configs:
{% for ceph_exporter in ceph_exporter_targets %}
- targets: ['{{ ceph_exporter.url }}']
labels:
instance: '{{ ceph_exporter.hostname }}'
{% endfor %}
{% endif %}
18 changes: 18 additions & 0 deletions src/pybind/mgr/cephadm/tests/test_agent.py
Expand Up @@ -16,6 +16,10 @@ def service_name(self):

class FakeCache:
def get_daemons_by_service(self, service_type):
if service_type == 'ceph-exporter':
return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'),
FakeDaemonDescription('1.2.3.5', [9926], 'node1')]

return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
FakeDaemonDescription('1.2.3.5', [9200], 'node1')]

Expand Down Expand Up @@ -150,6 +154,20 @@ def test_get_sd_config_haproxy(self):
assert cfg[0]['targets'] == ['1.2.3.4:9049']
assert cfg[0]['labels'] == {'instance': 'ingress'}

def test_get_sd_config_ceph_exporter(self):
mgr = FakeMgr()
root = Root(mgr)
cfg = root.get_sd_config('ceph-exporter')

# check response structure
assert cfg
for entry in cfg:
assert 'labels' in entry
assert 'targets' in entry

# check content
assert cfg[0]['targets'] == ['1.2.3.4:9926']

def test_get_sd_config_invalid_service(self):
mgr = FakeMgr()
root = Root(mgr)
Expand Down
11 changes: 10 additions & 1 deletion src/pybind/mgr/cephadm/tests/test_services.py
Expand Up @@ -16,7 +16,8 @@
NodeExporterService, LokiService, PromtailService
from cephadm.module import CephadmOrchestrator
from ceph.deployment.service_spec import IscsiServiceSpec, MonitoringSpec, AlertManagerSpec, \
ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec, PrometheusSpec
ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec, \
PrometheusSpec, CephExporterSpec
from cephadm.tests.fixtures import with_host, with_service, _run_cephadm, async_side_effect

from orchestrator import OrchestratorError
Expand Down Expand Up @@ -396,6 +397,7 @@ def test_prometheus_config(self, _run_cephadm, cephadm_module: CephadmOrchestrat

with with_host(cephadm_module, 'test'):
with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
with_service(cephadm_module, CephExporterSpec('ceph-exporter')) as _, \
with_service(cephadm_module, PrometheusSpec('prometheus')) as _:

y = dedent("""
Expand All @@ -418,6 +420,13 @@ def test_prometheus_config(self, _run_cephadm, cephadm_module: CephadmOrchestrat
labels:
instance: 'test'
- job_name: 'ceph-exporter'
honor_labels: true
static_configs:
- targets: ['[1::4]:9926']
labels:
instance: 'test'
""").lstrip()

_run_cephadm.assert_called_with(
Expand Down

0 comments on commit 5f04222

Please sign in to comment.