Skip to content

Commit

Permalink
mgr/cephadm: automatically configure dashboard <-> mgr connection
Browse files Browse the repository at this point in the history
Automatically configure the dashboard to talk to RGW.

This isn't perfect:
 - we semi-arbitrarily pick the oldest rgw. hopefully this will have the
   least about of turnover.
 - we always disable cert verification

Fixes: https://tracker.ceph.com/issues/44605
Signed-off-by: Sage Weil <sage@newdream.net>
  • Loading branch information
liewegas committed Jun 1, 2021
1 parent 0331281 commit a69ea64
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 22 deletions.
5 changes: 5 additions & 0 deletions src/pybind/mgr/cephadm/module.py
Expand Up @@ -466,6 +466,7 @@ def __init__(self, *args: Any, **kwargs: Any):
self.template = TemplateMgr(self)

self.requires_post_actions: Set[str] = set()
self.need_connect_dashboard_rgw = False

self.config_checker = CephadmConfigChecks(self)

Expand Down Expand Up @@ -2546,3 +2547,7 @@ def remove_osds_status(self) -> List[OSD]:
The CLI call to retrieve an osd removal report
"""
return self.to_remove_osds.all_osds()

def trigger_connect_dashboard_rgw(self) -> None:
self.need_connect_dashboard_rgw = True
self.event.set()
13 changes: 9 additions & 4 deletions src/pybind/mgr/cephadm/serve.py
Expand Up @@ -38,6 +38,8 @@

logger = logging.getLogger(__name__)

REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'nfs', 'rgw']


class CephadmServe:
"""
Expand Down Expand Up @@ -79,6 +81,11 @@ def serve(self) -> None:

self._update_paused_health()

if self.mgr.need_connect_dashboard_rgw and self.mgr.config_dashboard:
self.mgr.need_connect_dashboard_rgw = False
if 'dashboard' in self.mgr.get('mgr_map')['modules']:
self.mgr.cephadm_services['rgw'].connect_dashboard_rgw()

if not self.mgr.paused:
self.mgr.to_remove_osds.process_removal_queue()

Expand Down Expand Up @@ -878,7 +885,7 @@ def _check_daemons(self) -> None:
continue

# These daemon types require additional configs after creation
if dd.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'nfs']:
if dd.daemon_type in REQUIRES_POST_ACTIONS:
daemons_post[dd.daemon_type].append(dd)

if self.mgr.cephadm_services[daemon_type_to_service(dd.daemon_type)].get_active_daemon(
Expand Down Expand Up @@ -1055,9 +1062,7 @@ def _create_daemon(self,
sd = daemon_spec.to_daemon_description(
DaemonDescriptionStatus.running, 'starting')
self.mgr.cache.add_daemon(daemon_spec.host, sd)
if daemon_spec.daemon_type in [
'grafana', 'iscsi', 'prometheus', 'alertmanager'
]:
if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS:
self.mgr.requires_post_actions.add(daemon_spec.daemon_type)
self.mgr.cache.invalidate_host_daemons(daemon_spec.host)

Expand Down
103 changes: 102 additions & 1 deletion src/pybind/mgr/cephadm/services/cephadmservice.py
Expand Up @@ -2,6 +2,7 @@
import json
import logging
import re
import subprocess
from abc import ABCMeta, abstractmethod
from typing import TYPE_CHECKING, List, Callable, TypeVar, \
Optional, Dict, Any, Tuple, NewType, cast
Expand All @@ -10,7 +11,7 @@

from ceph.deployment.service_spec import ServiceSpec, RGWSpec
from ceph.deployment.utils import is_ipv6, unwrap_ipv6
from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus
from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus, raise_if_exception
from orchestrator._interface import daemon_type_to_service
from cephadm import utils

Expand Down Expand Up @@ -882,6 +883,7 @@ def post_remove(self, daemon: DaemonDescription) -> None:
'who': utils.name_to_config_section(daemon.name()),
'name': 'rgw_frontends',
})
self.mgr.trigger_connect_dashboard_rgw()

def ok_to_stop(
self,
Expand Down Expand Up @@ -918,6 +920,105 @@ def ingress_present() -> bool:
warn_message = "WARNING: Removing RGW daemons can cause clients to lose connectivity. "
return HandleCommandResult(-errno.EBUSY, '', warn_message)

def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
self.mgr.trigger_connect_dashboard_rgw()

def connect_dashboard_rgw(self) -> None:
"""
Configure the dashboard to talk to RGW
"""
self.mgr.log.info('Checking dashboard <-> RGW connection')

def radosgw_admin(args: List[str]) -> Tuple[int, str, str]:
try:
result = subprocess.run(
[
'radosgw-admin',
'-c', str(self.mgr.get_ceph_conf_path()),
'-k', str(self.mgr.get_ceph_option('keyring')),
'-n', f'mgr.{self.mgr.get_mgr_id()}',
] + args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=10,
)
return result.returncode, result.stdout.decode('utf-8'), result.stderr.decode('utf-8')
except subprocess.CalledProcessError as ex:
self.mgr.log.error(f'Error executing radosgw-admin {ex.cmd}: {ex.output}')
raise
except subprocess.TimeoutExpired as ex:
self.mgr.log.error(f'Timeout (10s) executing radosgw-admin {ex.cmd}')
raise

def get_secrets(out: str) -> Tuple[Optional[str], Optional[str]]:
r = json.loads(out)
for k in r.get('keys', []):
if k.get('user') == user and r.get('system') in ['true', True]:
access_key = k.get('access_key')
secret_key = k.get('secret_key')
return access_key, secret_key
return None, None

def update_dashboard(what: str, value: str) -> None:
_, out, _ = self.mgr.check_mon_command({'prefix': f'dashboard get-{what}'})
if out.strip() != value:
if what.endswith('-key'):
self.mgr.check_mon_command(
{'prefix': f'dashboard set-{what}'},
inbuf=value
)
else:
self.mgr.check_mon_command({'prefix': f'dashboard set-{what}',
"value": value})
self.mgr.log.info(f'Updated dashboard {what}')

completion = self.mgr.list_daemons(daemon_type='rgw')
raise_if_exception(completion)
daemons = completion.result
if not daemons:
self.mgr.log.info('No remaining RGW daemons; disconnecting dashboard')
self.mgr.check_mon_command({'prefix': 'dashboard reset-rgw-api-host'})
self.mgr.check_mon_command({'prefix': 'dashboard reset-rgw-api-port'})
self.mgr.check_mon_command({'prefix': 'dashboard reset-rgw-api-scheme'})
return

# set up dashboard creds
user = 'dashboard'
access_key = None
secret_key = None
rc, out, _ = radosgw_admin(['user', 'info', '--uid', user])
if not rc:
access_key, secret_key = get_secrets(out)
if not access_key:
rc, out, err = radosgw_admin([
'user', 'create', '--uid', user, '--display-name', 'Ceph Dashboard',
'--system',
])
if not rc:
access_key, secret_key = get_secrets(out)
if not access_key:
self.mgr.log.error(f'Unable to create rgw {user} user: {err}')
assert access_key
assert secret_key
update_dashboard('rgw-api-access-key', access_key)
update_dashboard('rgw-api-secret-key', secret_key)

# configure rgw endpoint using the oldest rgw daemon
# FIXME: we should perhaps check if the old value references a daemon that
# still exists and is up and, if so, leave this be.
daemons.sort(key=lambda x: x.created) # oldest first
dd = daemons[0]
assert dd.hostname
self.mgr.log.info(f'Connecting dashboard to {dd.name()}')
spec = cast(RGWSpec, self.mgr.spec_store[dd.service_name()].spec)
port = dd.ports[0] if dd.ports else 80
host = dd.ip or self.mgr.inventory.get_addr(dd.hostname) or dd.hostname
proto = 'https' if spec.ssl else 'http'
update_dashboard('rgw-api-host', host)
update_dashboard('rgw-api-port', str(port))
update_dashboard('rgw-api-scheme', proto)
update_dashboard('rgw-api-ssl-verify', 'False') # TODO: detect self-signedness


class RbdMirrorService(CephService):
TYPE = 'rbd-mirror'
Expand Down
35 changes: 18 additions & 17 deletions src/pybind/mgr/cephadm/tests/fixtures.py
Expand Up @@ -98,28 +98,29 @@ def assert_rm_service(cephadm: CephadmOrchestrator, srv_name):

@contextmanager
def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '') -> Iterator[List[str]]:
if spec.placement.is_empty() and host:
spec.placement = PlacementSpec(hosts=[host], count=1)
if meth is not None:
c = meth(cephadm_module, spec)
assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...'
else:
c = cephadm_module.apply([spec])
assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...']
with mock.patch("cephadm.module.CephadmOrchestrator.connect_dashboard_rgw"):
if spec.placement.is_empty() and host:
spec.placement = PlacementSpec(hosts=[host], count=1)
if meth is not None:
c = meth(cephadm_module, spec)
assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...'
else:
c = cephadm_module.apply([spec])
assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...']

specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())]
assert spec in specs
specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())]
assert spec in specs

CephadmServe(cephadm_module)._apply_all_services()
CephadmServe(cephadm_module)._apply_all_services()

dds = wait(cephadm_module, cephadm_module.list_daemons())
own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()]
if host:
assert own_dds
dds = wait(cephadm_module, cephadm_module.list_daemons())
own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()]
if host:
assert own_dds

yield [dd.name() for dd in own_dds]
yield [dd.name() for dd in own_dds]

assert_rm_service(cephadm_module, spec.service_name())
assert_rm_service(cephadm_module, spec.service_name())


def _deploy_cephadm_binary(host):
Expand Down

0 comments on commit a69ea64

Please sign in to comment.