Skip to content

Commit

Permalink
mgr/cephadm: rewrite/simplify describe_service
Browse files Browse the repository at this point in the history
The prior implementation first tried to fabricate services based on the
running daemons, and then filled in defined services on top.  This led
to duplication and a range of small errors.

Instead, flip this around: start with the services that are defined,
and only fill in 'unmanaged' services where we need to.

Drop the osd kludges and instead rely on DaemonDescription.service_id to
return the right thing.

Signed-off-by: Sage Weil <sage@newdream.net>
  • Loading branch information
liewegas committed Apr 9, 2021
1 parent 5adef5f commit d4c14dc
Showing 1 changed file with 46 additions and 68 deletions.
114 changes: 46 additions & 68 deletions src/pybind/mgr/cephadm/module.py
Expand Up @@ -1555,97 +1555,75 @@ def describe_service(self, service_type: Optional[str] = None, service_name: Opt
self._invalidate_daemons_and_kick_serve()
self.log.info('Kicked serve() loop to refresh all services')

# <service_map>
sm: Dict[str, orchestrator.ServiceDescription] = {}
osd_count = 0

# known services
for n, spec in self.spec_store.all_specs.items():
if service_type is not None and service_type != spec.service_type:
continue
if service_name is not None and service_name != n:
continue
sm[n] = orchestrator.ServiceDescription(
spec=spec,
size=spec.placement.get_target_count(self.inventory.all_specs()),
running=0,
events=self.events.get_for_service(spec.service_name()),
created=self.spec_store.spec_created[n],
deleted=self.spec_store.spec_deleted.get(n, None),
)
if service_type == 'nfs':
spec = cast(NFSServiceSpec, spec)
sm[n].rados_config_location = spec.rados_config_location()
if spec.service_type == 'ha-rgw':
# ha-rgw has 2 daemons running per host
sm[n].size = sm[n].size * 2

# factor daemons into status
for h, dm in self.cache.get_daemons_with_volatile_status():
for name, dd in dm.items():
assert dd.hostname is not None, f'no hostname for {dd!r}'
assert dd.daemon_type is not None, f'no daemon_type for {dd!r}'

if service_type and service_type != dd.daemon_type:
continue
n: str = dd.service_name()

if (
service_type
and service_type != n
and not daemon_type.startswith(n + '.')
):
continue
if service_name and service_name != n:
continue
if dd.daemon_type == 'osd':
"""
OSDs do not know the affinity to their spec out of the box.
"""
n = f"osd.{dd.osdspec_affinity}"
if not dd.osdspec_affinity:
# If there is no osdspec_affinity, the spec should suffice for displaying
continue
if n in self.spec_store.all_specs:
spec = self.spec_store.all_specs[n]
else:

if n not in sm:
# new unmanaged service
spec = ServiceSpec(
unmanaged=True,
service_type=daemon_type_to_service(dd.daemon_type),
service_id=dd.service_id(),
placement=PlacementSpec(
hosts=[dd.hostname]
)
)
if n not in sm:
sm[n] = orchestrator.ServiceDescription(
last_refresh=dd.last_refresh,
container_image_id=dd.container_image_id,
container_image_name=dd.container_image_name,
spec=spec,
events=self.events.get_for_service(spec.service_name()),
size=0,
)
if n in self.spec_store.all_specs:
if dd.daemon_type == 'osd':
"""
The osd count can't be determined by the Placement spec.
Showing an actual/expected representation cannot be determined
here. So we're setting running = size for now.
"""
osd_count += 1
sm[n].size = osd_count
else:
sm[n].size = spec.placement.get_target_count(
self.inventory.all_specs())

sm[n].created = self.spec_store.spec_created[n]
sm[n].deleted = self.spec_store.spec_deleted.get(n, None)

if service_type == 'nfs':
spec = cast(NFSServiceSpec, spec)
sm[n].rados_config_location = spec.rados_config_location()
else:
sm[n].size = 0

if dd.status == DaemonDescriptionStatus.running:
sm[n].running += 1
if not sm[n].last_refresh or not dd.last_refresh or dd.last_refresh < sm[n].last_refresh: # type: ignore
if dd.daemon_type == 'osd':
# The osd count can't be determined by the Placement spec.
# Showing an actual/expected representation cannot be determined
# here. So we're setting running = size for now.
sm[n].size += 1
if (
not sm[n].last_refresh
or not dd.last_refresh
or dd.last_refresh < sm[n].last_refresh # type: ignore
):
sm[n].last_refresh = dd.last_refresh
if sm[n].container_image_id != dd.container_image_id:
sm[n].container_image_id = 'mix'
if sm[n].container_image_name != dd.container_image_name:
sm[n].container_image_name = 'mix'
if dd.daemon_type == 'haproxy' or dd.daemon_type == 'keepalived':
# ha-rgw has 2 daemons running per host
sm[n].size = sm[n].size * 2
for n, spec in self.spec_store.all_specs.items():
if n in sm:
continue
if service_type is not None and service_type != spec.service_type:
continue
if service_name is not None and service_name != n:
continue
sm[n] = orchestrator.ServiceDescription(
spec=spec,
size=spec.placement.get_target_count(self.inventory.all_specs()),
running=0,
events=self.events.get_for_service(spec.service_name()),
)
if service_type == 'nfs':
spec = cast(NFSServiceSpec, spec)
sm[n].rados_config_location = spec.rados_config_location()
if spec.service_type == 'ha-rgw':
# ha-rgw has 2 daemons running per host
sm[n].size = sm[n].size * 2

return list(sm.values())

@handle_orch_error
Expand Down

0 comments on commit d4c14dc

Please sign in to comment.