Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reef: mgr/prometheus: fix orch check to prevent Prometheus crash #55491

Merged
merged 1 commit into from Feb 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 14 additions & 10 deletions src/pybind/mgr/prometheus/module.py
Expand Up @@ -13,7 +13,7 @@

from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT, HandleCommandResult, CLIWriteCommand
from mgr_util import get_default_addr, profile_method, build_url
from orchestrator import OrchestratorClientMixin, raise_if_exception, NoOrchestrator
from orchestrator import OrchestratorClientMixin, raise_if_exception, OrchestratorError
from rbd import RBD

from typing import DefaultDict, Optional, Dict, Any, Set, cast, Tuple, Union, List, Callable
Expand Down Expand Up @@ -646,8 +646,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
_global_instance = self
self.metrics_thread = MetricCollectionThread(_global_instance)
self.health_history = HealthHistory(self)
self.modify_instance_id = self.get_orch_status() and self.get_module_option(
'exclude_perf_counters')

def _setup_static_metrics(self) -> Dict[str, Metric]:
metrics = {}
Expand Down Expand Up @@ -864,10 +862,12 @@ def _setup_static_metrics(self) -> Dict[str, Metric]:

return metrics

def get_orch_status(self) -> bool:
def orch_is_available(self) -> bool:
try:
return self.available()[0]
except NoOrchestrator:
except (RuntimeError, OrchestratorError, ImportError):
# import error could happend during startup in case
# orchestrator has not been loaded yet by the mgr
return False

def get_server_addr(self) -> str:
Expand Down Expand Up @@ -1292,18 +1292,22 @@ def _get_pool_info(pool: Dict[str, Any]) -> Tuple[str, str]:
# Populate other servers metadata
# If orchestrator is available and ceph-exporter is running modify rgw instance id
# to match the one from exporter
if self.modify_instance_id:
modify_instance_id = self.orch_is_available() and self.get_module_option('exclude_perf_counters')
if modify_instance_id:
daemons = raise_if_exception(self.list_daemons(daemon_type='rgw'))
for daemon in daemons:
if daemon.daemon_id and '.' in daemon.daemon_id:
instance_id = daemon.daemon_id.split(".")[2]
else:
instance_id = daemon.daemon_id if daemon.daemon_id else ""
self.metrics['rgw_metadata'].set(1,
('{}.{}'.format(str(daemon.daemon_type),
str(daemon.daemon_id)),
(f"{daemon.daemon_type}.{daemon.daemon_id}",
str(daemon.hostname),
str(daemon.version),
str(daemon.daemon_id).split(".")[2]))
instance_id))
for key, value in servers.items():
service_id, service_type = key
if service_type == 'rgw' and not self.modify_instance_id:
if service_type == 'rgw' and not modify_instance_id:
hostname, version, name = value
self.metrics['rgw_metadata'].set(
1,
Expand Down