Skip to content

Commit

Permalink
Merge pull request #56497 from adk3798/wip-65124-squid
Browse files Browse the repository at this point in the history
squid: mgr/cephadm: add some NVMEof gateway fields to be configurable by cephadm

Reviewed-by: Ernesto Puerta <epuertat@redhat.com>
Reviewed-by: John Mulligan <jmulligan@redhat.com>
  • Loading branch information
adk3798 committed Apr 29, 2024
2 parents f97a34d + a8822f7 commit 6a0c153
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 26 deletions.
5 changes: 3 additions & 2 deletions src/pybind/mgr/cephadm/services/nvmeof.py
Expand Up @@ -43,8 +43,9 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
'name': name,
'addr': host_ip,
'port': spec.port,
'log_level': 'WARN',
'rpc_socket': '/var/tmp/spdk.sock',
'spdk_log_level': 'WARNING',
'rpc_socket_dir': '/var/tmp/',
'rpc_socket_name': 'spdk.sock',
'transport_tcp_options': transport_tcp_options,
'rados_id': rados_id
}
Expand Down
Expand Up @@ -3,16 +3,37 @@
name = {{ name }}
group = {{ spec.group if spec.group is not none else '' }}
addr = {{ addr }}
port = {{ port }}
port = {{ spec.port }}
enable_auth = {{ spec.enable_auth }}
state_update_notify = True
state_update_interval_sec = 5
min_controller_id = {{ spec.min_controller_id }}
max_controller_id = {{ spec.max_controller_id }}
state_update_notify = {{ spec.state_update_notify }}
state_update_interval_sec = {{ spec.state_update_interval_sec }}
enable_spdk_discovery_controller = {{ spec.enable_spdk_discovery_controller }}
enable_prometheus_exporter = True
enable_prometheus_exporter = {{ spec.enable_prometheus_exporter }}
prometheus_exporter_ssl = False
prometheus_port = 10008
verify_nqns = {{ spec.verify_nqns }}
omap_file_lock_duration = {{ spec.omap_file_lock_duration }}
omap_file_lock_retries = {{ spec.omap_file_lock_retries }}
omap_file_lock_retry_sleep_interval = {{ spec.omap_file_lock_retry_sleep_interval }}
omap_file_update_reloads = {{ spec.omap_file_update_reloads }}
allowed_consecutive_spdk_ping_failures = {{ spec.allowed_consecutive_spdk_ping_failures }}
spdk_ping_interval_in_seconds = {{ spec.spdk_ping_interval_in_seconds }}
ping_spdk_under_lock = {{ spec.ping_spdk_under_lock }}
enable_monitor_client = {{ spec.enable_monitor_client }}

[gateway-logs]
log_level = {{ spec.log_level }}
log_files_enabled = {{ spec.log_files_enabled }}
log_files_rotation_enabled = {{ spec.log_files_rotation_enabled }}
verbose_log_messages = {{ spec.verbose_log_messages }}
max_log_file_size_in_mb = {{ spec.max_log_file_size_in_mb }}
max_log_files_count = {{ spec.max_log_files_count }}
max_log_directory_backups = {{ spec.max_log_directory_backups }}
log_directory = {{ spec.log_directory }}

[discovery]
addr = {{ addr }}
port = {{ spec.discovery_port }}

[ceph]
pool = {{ spec.pool }}
Expand All @@ -27,9 +48,11 @@ client_cert = {{ spec.client_cert }}

[spdk]
tgt_path = {{ spec.tgt_path }}
rpc_socket = {{ rpc_socket }}
timeout = {{ spec.timeout }}
log_level = {{ log_level }}
rpc_socket_dir = {{ spec.rpc_socket_dir }}
rpc_socket_name = {{ spec.rpc_socket_name }}
timeout = {{ spec.spdk_timeout }}
bdevs_per_cluster = {{ spec.bdevs_per_cluster }}
log_level = {{ spec.spdk_log_level }}
conn_retries = {{ spec.conn_retries }}
transports = {{ spec.transports }}
{% if transport_tcp_options %}
Expand All @@ -38,3 +61,7 @@ transport_tcp_options = {{ transport_tcp_options }}
{% if spec.tgt_cmd_extra_args %}
tgt_cmd_extra_args = {{ spec.tgt_cmd_extra_args }}
{% endif %}

[monitor]
timeout = {{ spec.monitor_timeout }}

38 changes: 32 additions & 6 deletions src/pybind/mgr/cephadm/tests/test_services.py
Expand Up @@ -391,12 +391,33 @@ def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrc
enable_auth = False
state_update_notify = True
state_update_interval_sec = 5
min_controller_id = 1
max_controller_id = 65519
enable_spdk_discovery_controller = False
enable_prometheus_exporter = True
prometheus_exporter_ssl = False
prometheus_port = 10008
verify_nqns = True
omap_file_lock_duration = 20
omap_file_lock_retries = 30
omap_file_lock_retry_sleep_interval = 1.0
omap_file_update_reloads = 10
allowed_consecutive_spdk_ping_failures = 1
spdk_ping_interval_in_seconds = 2.0
ping_spdk_under_lock = False
enable_monitor_client = False
[gateway-logs]
log_level = INFO
log_files_enabled = True
log_files_rotation_enabled = True
verbose_log_messages = True
max_log_file_size_in_mb = 10
max_log_files_count = 20
max_log_directory_backups = 10
log_directory = /var/log/ceph/
[discovery]
addr = 192.168.100.100
port = 8009
[ceph]
pool = {pool}
Expand All @@ -411,13 +432,18 @@ def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrc
[spdk]
tgt_path = /usr/local/bin/nvmf_tgt
rpc_socket = /var/tmp/spdk.sock
timeout = 60
log_level = WARN
rpc_socket_dir = /var/tmp/
rpc_socket_name = spdk.sock
timeout = 60.0
bdevs_per_cluster = 32
log_level = WARNING
conn_retries = 10
transports = tcp
transport_tcp_options = {{"in_capsule_data_size": 8192, "max_io_qpairs_per_ctrlr": 7}}
tgt_cmd_extra_args = {tgt_cmd_extra_args}\n"""
tgt_cmd_extra_args = {tgt_cmd_extra_args}
[monitor]
timeout = 1.0\n"""

with with_host(cephadm_module, 'test'):
with with_service(cephadm_module, NvmeofServiceSpec(service_id=pool,
Expand Down
150 changes: 141 additions & 9 deletions src/python-common/ceph/deployment/service_spec.py
Expand Up @@ -1312,21 +1312,45 @@ def __init__(self,
port: Optional[int] = None,
pool: Optional[str] = None,
enable_auth: bool = False,
min_controller_id: Optional[str] = '1',
max_controller_id: Optional[str] = '65519',
state_update_notify: Optional[bool] = True,
state_update_interval_sec: Optional[int] = 5,
enable_spdk_discovery_controller: Optional[bool] = False,
omap_file_lock_duration: Optional[int] = 20,
omap_file_lock_retries: Optional[int] = 30,
omap_file_lock_retry_sleep_interval: Optional[float] = 1.0,
omap_file_update_reloads: Optional[int] = 10,
enable_prometheus_exporter: Optional[bool] = True,
bdevs_per_cluster: Optional[int] = 32,
verify_nqns: Optional[bool] = True,
allowed_consecutive_spdk_ping_failures: Optional[int] = 1,
spdk_ping_interval_in_seconds: Optional[float] = 2.0,
ping_spdk_under_lock: Optional[bool] = False,
server_key: Optional[str] = None,
server_cert: Optional[str] = None,
client_key: Optional[str] = None,
client_cert: Optional[str] = None,
spdk_path: Optional[str] = None,
tgt_path: Optional[str] = None,
timeout: Optional[int] = 60,
spdk_timeout: Optional[float] = 60.0,
spdk_log_level: Optional[str] = 'WARNING',
rpc_socket_dir: Optional[str] = '/var/tmp/',
rpc_socket_name: Optional[str] = 'spdk.sock',
conn_retries: Optional[int] = 10,
transports: Optional[str] = 'tcp',
transport_tcp_options: Optional[Dict[str, int]] =
{"in_capsule_data_size": 8192, "max_io_qpairs_per_ctrlr": 7},
tgt_cmd_extra_args: Optional[str] = None,
discovery_port: Optional[int] = None,
log_level: Optional[str] = 'INFO',
log_files_enabled: Optional[bool] = True,
log_files_rotation_enabled: Optional[bool] = True,
verbose_log_messages: Optional[bool] = True,
max_log_file_size_in_mb: Optional[int] = 10,
max_log_files_count: Optional[int] = 20,
max_log_directory_backups: Optional[int] = 10,
log_directory: Optional[str] = '/var/log/ceph/',
monitor_timeout: Optional[float] = 1.0,
enable_monitor_client: bool = False,
placement: Optional[PlacementSpec] = None,
unmanaged: bool = False,
preview_only: bool = False,
Expand Down Expand Up @@ -1355,12 +1379,32 @@ def __init__(self,
self.group = group
#: ``enable_auth`` enables user authentication on nvmeof gateway
self.enable_auth = enable_auth
#: ``min_controller_id`` minimum controller id used by SPDK, essential for multipath
self.min_controller_id = min_controller_id
#: ``max_controller_id`` maximum controller id used by SPDK, essential for multipath
self.max_controller_id = max_controller_id
#: ``state_update_notify`` enables automatic update from OMAP in nvmeof gateway
self.state_update_notify = state_update_notify
#: ``state_update_interval_sec`` number of seconds to check for updates in OMAP
self.state_update_interval_sec = state_update_interval_sec
#: ``enable_spdk_discovery_controller`` SPDK or ceph-nvmeof discovery service
self.enable_spdk_discovery_controller = enable_spdk_discovery_controller
#: ``enable_prometheus_exporter`` enables Prometheus exporter
self.enable_prometheus_exporter = enable_prometheus_exporter
#: ``verify_nqns`` enables verification of subsystem and host NQNs for validity
self.verify_nqns = verify_nqns
#: ``omap_file_lock_duration`` number of seconds before automatically unlock OMAP file lock
self.omap_file_lock_duration = omap_file_lock_duration
#: ``omap_file_lock_retries`` number of retries to lock OMAP file before giving up
self.omap_file_lock_retries = omap_file_lock_retries
#: ``omap_file_lock_retry_sleep_interval`` seconds to wait before retrying to lock OMAP
self.omap_file_lock_retry_sleep_interval = omap_file_lock_retry_sleep_interval
#: ``omap_file_update_reloads`` number of attempt to reload OMAP when it differs from local
self.omap_file_update_reloads = omap_file_update_reloads
#: ``allowed_consecutive_spdk_ping_failures`` # of ping failures before aborting gateway
self.allowed_consecutive_spdk_ping_failures = allowed_consecutive_spdk_ping_failures
#: ``spdk_ping_interval_in_seconds`` sleep interval in seconds between SPDK pings
self.spdk_ping_interval_in_seconds = spdk_ping_interval_in_seconds
#: ``ping_spdk_under_lock`` whether or not we should perform SPDK ping under the RPC lock
self.ping_spdk_under_lock = ping_spdk_under_lock
#: ``bdevs_per_cluster`` number of bdevs per cluster
self.bdevs_per_cluster = bdevs_per_cluster
#: ``server_key`` gateway server key
self.server_key = server_key or './server.key'
#: ``server_cert`` gateway server certificate
Expand All @@ -1373,8 +1417,14 @@ def __init__(self,
self.spdk_path = spdk_path or '/usr/local/bin/nvmf_tgt'
#: ``tgt_path`` nvmeof target path
self.tgt_path = tgt_path or '/usr/local/bin/nvmf_tgt'
#: ``timeout`` ceph connectivity timeout
self.timeout = timeout
#: ``spdk_timeout`` SPDK connectivity timeout
self.spdk_timeout = spdk_timeout
#: ``spdk_log_level`` the SPDK log level
self.spdk_log_level = spdk_log_level or 'WARNING'
#: ``rpc_socket_dir`` the SPDK socket file directory
self.rpc_socket_dir = rpc_socket_dir or '/var/tmp/'
#: ``rpc_socket_name`` the SPDK socket file name
self.rpc_socket_name = rpc_socket_name or 'spdk.sock'
#: ``conn_retries`` ceph connection retries number
self.conn_retries = conn_retries
#: ``transports`` tcp
Expand All @@ -1383,6 +1433,28 @@ def __init__(self,
self.transport_tcp_options: Optional[Dict[str, int]] = transport_tcp_options
#: ``tgt_cmd_extra_args`` extra arguments for the nvmf_tgt process
self.tgt_cmd_extra_args = tgt_cmd_extra_args
#: ``discovery_port`` port of the discovery service
self.discovery_port = discovery_port or 8009
#: ``log_level`` the nvmeof gateway log level
self.log_level = log_level or 'INFO'
#: ``log_files_enabled`` enables the usage of files to keep the nameof gateway log
self.log_files_enabled = log_files_enabled
#: ``log_files_rotation_enabled`` enables rotation of log files when pass the size limit
self.log_files_rotation_enabled = log_files_rotation_enabled
#: ``verbose_log_messages`` add more details to the nvmeof gateway log message
self.verbose_log_messages = verbose_log_messages
#: ``max_log_file_size_in_mb`` max size in MB before starting a new log file
self.max_log_file_size_in_mb = max_log_file_size_in_mb
#: ``max_log_files_count`` max log files to keep before overriding them
self.max_log_files_count = max_log_files_count
#: ``max_log_directory_backups`` max directories for old gateways with same name to keep
self.max_log_directory_backups = max_log_directory_backups
#: ``log_directory`` directory for keeping nameof gateway log files
self.log_directory = log_directory or '/var/log/ceph/'
#: ``monitor_timeout`` monitor connectivity timeout
self.monitor_timeout = monitor_timeout
#: ``enable_monitor_client`` whether to connect to the ceph monitor or not
self.enable_monitor_client = enable_monitor_client

def get_port_start(self) -> List[int]:
return [5500, 4420, 8009]
Expand All @@ -1402,6 +1474,66 @@ def validate(self) -> None:
if self.transports not in ['tcp']:
raise SpecValidationError('Invalid transport. Valid values are tcp')

if self.log_level:
if self.log_level not in ['debug', 'DEBUG',
'info', 'INFO',
'warning', 'WARNING',
'error', 'ERROR',
'critical', 'CRITICAL']:
raise SpecValidationError(
'Invalid log level. Valid values are: debug, info, warning, error, critial')

if self.spdk_log_level:
if self.spdk_log_level not in ['debug', 'DEBUG',
'info', 'INFO',
'warning', 'WARNING',
'error', 'ERROR',
'notice', 'NOTICE']:
raise SpecValidationError(
'Invalid SPDK log level. Valid values are: DEBUG, INFO, WARNING, ERROR, NOTICE')

if self.spdk_ping_interval_in_seconds < 1.0:
raise SpecValidationError("SPDK ping interval should be at least 1 second")

if self.allowed_consecutive_spdk_ping_failures < 1:
raise SpecValidationError("Allowed consecutive SPDK ping failures should be at least 1")

if self.state_update_interval_sec < 0:
raise SpecValidationError("State update interval can't be negative")

if self.omap_file_lock_duration < 0:
raise SpecValidationError("OMAP file lock duration can't be negative")

if self.omap_file_lock_retries < 0:
raise SpecValidationError("OMAP file lock retries can't be negative")

if self.omap_file_update_reloads < 0:
raise SpecValidationError("OMAP file reloads can't be negative")

if self.spdk_timeout < 0.0:
raise SpecValidationError("SPDK timeout can't be negative")

if self.conn_retries < 0:
raise SpecValidationError("Connection retries can't be negative")

if self.max_log_file_size_in_mb < 0:
raise SpecValidationError("Log file size can't be negative")

if self.max_log_files_count < 0:
raise SpecValidationError("Log files count can't be negative")

if self.max_log_directory_backups < 0:
raise SpecValidationError("Log file directory backups can't be negative")

if self.monitor_timeout < 0.0:
raise SpecValidationError("Monitor timeout can't be negative")

if self.port and self.port < 0:
raise SpecValidationError("Port can't be negative")

if self.discovery_port and self.discovery_port < 0:
raise SpecValidationError("Discovery port can't be negative")


yaml.add_representer(NvmeofServiceSpec, ServiceSpec.yaml_representer)

Expand Down

0 comments on commit 6a0c153

Please sign in to comment.