From fd99fa3047e82e3f1c9c09cc42422e74e73b8688 Mon Sep 17 00:00:00 2001 From: Redouane Kachach Date: Wed, 24 Aug 2022 13:57:50 +0200 Subject: [PATCH] mgr/cephadm: validating tuned profile specification fixes: https://tracker.ceph.com/issues/57192 fixes: https://tracker.ceph.com/issues/57191 Signed-off-by: Redouane Kachach --- src/cephadm/cephadm | 10 ++ src/pybind/mgr/cephadm/module.py | 62 +++++++++- .../mgr/cephadm/services/cephadmservice.py | 3 +- src/pybind/mgr/cephadm/tests/test_cephadm.py | 116 ++++++++++++++++++ src/python-common/ceph/deployment/hostspec.py | 10 +- 5 files changed, 195 insertions(+), 6 deletions(-) diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index fa03cb6441c226..8d7d583a262b90 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -8305,6 +8305,7 @@ class HostFacts(): def __init__(self, ctx: CephadmContext): self.ctx: CephadmContext = ctx self.cpu_model: str = 'Unknown' + self.sysctl_options: Dict[str, str] = self._populate_sysctl_options() self.cpu_count: int = 0 self.cpu_cores: int = 0 self.cpu_threads: int = 0 @@ -8319,6 +8320,15 @@ class HostFacts(): self._block_devices = self._get_block_devs() self._device_list = self._get_device_info() + def _populate_sysctl_options(self) -> Dict[str, str]: + sysctl_options = {} + out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + if out: + for line in out.splitlines(): + option, value = line.split('=') + sysctl_options[option.strip()] = value.strip() + return sysctl_options + def _discover_enclosures(self) -> Dict[str, Enclosure]: """Build a dictionary of discovered scsi enclosures diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index e3d90f13aebd24..fd72fa938f2ea4 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2008,7 +2008,8 @@ def _daemon_action(self, else: # for OSDs, we still need to update config, just not carry out the full # prepare_create function - daemon_spec.final_config, daemon_spec.deps = self.osd_service.generate_config(daemon_spec) + daemon_spec.final_config, daemon_spec.deps = self.osd_service.generate_config( + daemon_spec) return self.wait_async(CephadmServe(self)._create_daemon(daemon_spec, reconfig=(action == 'reconfig'))) actions = { @@ -2535,13 +2536,70 @@ def _apply(self, spec: GenericSpec) -> str: return self._apply_service_spec(cast(ServiceSpec, spec)) + def _get_candidate_hosts(self, placement: PlacementSpec) -> List[str]: + """Return a list of candidate hosts according to the placement specification.""" + all_hosts = self.cache.get_schedulable_hosts() + draining_hosts = [dh.hostname for dh in self.cache.get_draining_hosts()] + candidates = [] + if placement.hosts: + candidates = [h.hostname for h in placement.hosts if h.hostname in placement.hosts] + elif placement.label: + candidates = [x.hostname for x in [h for h in all_hosts if placement.label in h.labels]] + elif placement.host_pattern: + candidates = [x for x in placement.filter_matching_hostspecs(all_hosts)] + elif (placement.count is not None or placement.count_per_host is not None): + candidates = [x.hostname for x in all_hosts] + return [h for h in candidates if h not in draining_hosts] + + def _validate_one_shot_placement_spec(self, spec: PlacementSpec) -> None: + """Validate placement specification for TunedProfileSpec and ClientKeyringSpec.""" + if spec.count is not None: + raise OrchestratorError( + "Placement 'count' field is no supported for this specification.") + if spec.count_per_host is not None: + raise OrchestratorError( + "Placement 'count_per_host' field is no supported for this specification.") + if spec.hosts: + all_hosts = [h.hostname for h in self.inventory.all_specs()] + invalid_hosts = [h.hostname for h in spec.hosts if h.hostname not in all_hosts] + if invalid_hosts: + raise OrchestratorError(f"Found invalid host(s) in placement section: {invalid_hosts}. " + f"Please check 'ceph orch host ls' for available hosts.") + elif not self._get_candidate_hosts(spec): + raise OrchestratorError("Invalid placement specification. No host(s) matched placement spec.\n" + "Please check 'ceph orch host ls' for available hosts.\n" + "Note: draining hosts are excluded from the candidate list.") + + def _validate_tunedprofile_settings(self, spec: TunedProfileSpec) -> Dict[str, List[str]]: + candidate_hosts = spec.placement.filter_matching_hostspecs(self.inventory.all_specs()) + invalid_options: Dict[str, List[str]] = {} + for host in candidate_hosts: + host_sysctl_options = self.cache.get_facts(host).get('sysctl_options', {}) + invalid_options[host] = [] + for option in spec.settings: + if option not in host_sysctl_options: + invalid_options[host].append(option) + return invalid_options + + def _validate_tuned_profile_spec(self, spec: TunedProfileSpec) -> None: + if not spec.settings: + raise OrchestratorError("Invalid spec: settings section cannot be empty.") + self._validate_one_shot_placement_spec(spec.placement) + invalid_options = self._validate_tunedprofile_settings(spec) + if any(e for e in invalid_options.values()): + raise OrchestratorError( + f'Failed to apply tuned profile. Invalid sysctl option(s) for host(s) detected: {invalid_options}') + @handle_orch_error def apply_tuned_profiles(self, specs: List[TunedProfileSpec], no_overwrite: bool = False) -> str: outs = [] for spec in specs: + self._validate_tuned_profile_spec(spec) if no_overwrite and self.tuned_profiles.exists(spec.profile_name): - outs.append(f"Tuned profile '{spec.profile_name}' already exists (--no-overwrite was passed)") + outs.append( + f"Tuned profile '{spec.profile_name}' already exists (--no-overwrite was passed)") else: + # done, let's save the specs self.tuned_profiles.add_profile(spec) outs.append(f'Saved tuned profile {spec.profile_name}') self._kick_serve_loop() diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index e2cd62aa7cb50c..9a7c7e40e8e6cb 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1077,7 +1077,8 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st 'host': daemon_spec.host, 'device_enhanced_scan': str(self.mgr.device_enhanced_scan)} - listener_cert, listener_key = agent.ssl_certs.generate_cert(self.mgr.inventory.get_addr(daemon_spec.host)) + listener_cert, listener_key = agent.ssl_certs.generate_cert( + self.mgr.inventory.get_addr(daemon_spec.host)) config = { 'agent.json': json.dumps(cfg), 'keyring': daemon_spec.keyring, diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 354ee338a5cfb7..c50a5fde46db54 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1938,3 +1938,119 @@ def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True): with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False): cephadm_module.inventory.add_label('test2', '_admin') + + @pytest.mark.parametrize("facts, settings, expected_value", + [ + # All options are available on all hosts + ( + { + "host1": + { + "sysctl_options": + { + 'opt1': 'val1', + 'opt2': 'val2', + } + }, + "host2": + { + "sysctl_options": + { + 'opt1': '', + 'opt2': '', + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': [], 'host2': []} # expected_value + ), + # opt1 is missing on host 1, opt2 is missing on host2 + ({ + "host1": + { + "sysctl_options": + { + 'opt2': '', + 'optX': '', + } + }, + "host2": + { + "sysctl_options": + { + 'opt1': '', + 'opt3': '', + 'opt4': '', + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': ['opt1'], 'host2': ['opt2']} # expected_value + ), + # All options are missing on all hosts + ({ + "host1": + { + "sysctl_options": + { + } + }, + "host2": + { + "sysctl_options": + { + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': ['opt1', 'opt2'], 'host2': [ + 'opt1', 'opt2']} # expected_value + ), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_tuned_profiles_settings_validation(self, facts, settings, expected_value, cephadm_module): + with with_host(cephadm_module, 'test'): + spec = mock.Mock() + spec.settings = sorted(settings) + spec.placement.filter_matching_hostspecs = mock.Mock() + spec.placement.filter_matching_hostspecs.return_value = ['host1', 'host2'] + cephadm_module.cache.facts = facts + assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_tuned_profiles_validation(self, cephadm_module): + with with_host(cephadm_module, 'test'): + + with pytest.raises(OrchestratorError, match="^Invalid placement specification.+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(hosts=[]) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="Invalid spec: settings section cannot be empty."): + spec = mock.Mock() + spec.settings = {} + spec.placement = PlacementSpec(hosts=['host1', 'host2']) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Placement 'count' field is no supported .+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(count=1) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Placement 'count_per_host' field is no supported .+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(count_per_host=1, label='foo') + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Found invalid host"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(hosts=['host1', 'host2']) + cephadm_module.inventory = mock.Mock() + cephadm_module.inventory.all_specs = mock.Mock( + return_value=[mock.Mock().hostname, mock.Mock().hostname]) + cephadm_module._validate_tuned_profile_spec(spec) diff --git a/src/python-common/ceph/deployment/hostspec.py b/src/python-common/ceph/deployment/hostspec.py index cb7e4de3484248..29e53c9d36171e 100644 --- a/src/python-common/ceph/deployment/hostspec.py +++ b/src/python-common/ceph/deployment/hostspec.py @@ -128,8 +128,12 @@ def __str__(self) -> str: return self.hostname def __eq__(self, other: Any) -> bool: + if not isinstance(other, HostSpec): + return NotImplemented # Let's omit `status` for the moment, as it is still the very same host. + if not isinstance(other, HostSpec): + return NotImplemented return self.hostname == other.hostname and \ - self.addr == other.addr and \ - sorted(self.labels) == sorted(other.labels) and \ - self.location == other.location + self.addr == other.addr and \ + sorted(self.labels) == sorted(other.labels) and \ + self.location == other.location