Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cephadm: add --container-cli-args parameter #42671

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 10 additions & 3 deletions doc/man/8/cephadm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Synopsis
| **cephadm**** [-h] [--image IMAGE] [--docker] [--data-dir DATA_DIR]
| [--log-dir LOG_DIR] [--logrotate-dir LOGROTATE_DIR]
| [--unit-dir UNIT_DIR] [--verbose] [--timeout TIMEOUT]
| [--retry RETRY] [--no-container-init]
| [--retry RETRY] [--no-container-init] [--container-cli-args]
| {version,pull,inspect-image,ls,list-networks,adopt,rm-daemon,rm-cluster,run,shell,enter,ceph-volume,unit,logs,bootstrap,deploy,check-host,prepare-host,add-repo,rm-repo,install}
| ...

Expand Down Expand Up @@ -84,7 +84,7 @@ Synopsis
| **cephadm** **deploy** [-h] --name NAME --fsid FSID [--config CONFIG]
| [--config-json CONFIG_JSON] [--keyring KEYRING]
| [--key KEY] [--osd-fsid OSD_FSID] [--skip-firewalld]
| [--tcp-ports TCP_PORTS] [--reconfig] [--allow-ptrace]
| [--tcp-ports TCP_PORTS] [--reconfig]

| **cephadm** **check-host** [-h] [--expect-hostname EXPECT_HOSTNAME]

Expand Down Expand Up @@ -160,6 +160,14 @@ Options

do not run podman/docker with `--init` (default: False)

.. option:: --container-cli-args

pass any additional flags to podman/docker cli (default: None)
guits marked this conversation as resolved.
Show resolved Hide resolved

For example, to debug Ceph daemons at runtime with gdb or strace, you must
enable the SYS_PTRACE capability on ceph's containers, and you can do this
with `--container-cli-args="--cap-add=PTRACE"`


Commands
========
Expand Down Expand Up @@ -292,7 +300,6 @@ Arguments:
* [--skip-firewalld] Do not configure firewalld
* [--tcp-ports List of tcp ports to open in the host firewall
* [--reconfig] Reconfigure a previously deployed daemon
* [--allow-ptrace] Allow SYS_PTRACE on daemon container


enter
Expand Down
2 changes: 1 addition & 1 deletion qa/tasks/cephadm.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ def ceph_bootstrap(ctx, config):
# set options
if config.get('allow_ptrace', True):
_shell(ctx, cluster_name, bootstrap_remote,
['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/container_cli_args', '--value="--cap-add=PTRACE"'])

if not config.get('avoid_pacific_features', False):
log.info('Distributing conf and client.admin keyring to all hosts + 0755')
Expand Down
39 changes: 23 additions & 16 deletions src/cephadm/cephadm
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class BaseConfig:

def __init__(self) -> None:
self.image: str = ''
self.container_cli_args: Optional[str] = None
self.docker: bool = False
self.data_dir: str = DATA_DIR
self.log_dir: str = LOG_DIR
Expand Down Expand Up @@ -2688,7 +2689,6 @@ def get_ceph_volume_container(ctx: CephadmContext,
def get_container(ctx: CephadmContext,
fsid: str, daemon_type: str, daemon_id: Union[int, str],
privileged: bool = False,
ptrace: bool = False,
container_args: Optional[List[str]] = None) -> 'CephContainer':
entrypoint: str = ''
name: str = ''
Expand Down Expand Up @@ -2777,6 +2777,9 @@ def get_container(ctx: CephadmContext,
if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
container_args.append('--cgroups=split')

if ctx.container_cli_args is not None:
container_args.extend(shlex.split(ctx.container_cli_args))

return CephContainer.for_daemon(
ctx,
fsid=fsid,
Expand All @@ -2789,7 +2792,6 @@ def get_container(ctx: CephadmContext,
bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
envs=envs,
privileged=privileged,
ptrace=ptrace,
host_network=host_network,
)

Expand Down Expand Up @@ -3434,12 +3436,12 @@ class CephContainer:
container_args: List[str] = [],
envs: Optional[List[str]] = None,
privileged: bool = False,
ptrace: bool = False,
bind_mounts: Optional[List[List[str]]] = None,
init: Optional[bool] = None,
host_network: bool = True,
memory_request: Optional[str] = None,
memory_limit: Optional[str] = None,
container_cli_args: Optional[List[str]] = None,
) -> None:
self.ctx = ctx
self.image = image
Expand All @@ -3450,12 +3452,12 @@ class CephContainer:
self.container_args = container_args
self.envs = envs
self.privileged = privileged
self.ptrace = ptrace
self.bind_mounts = bind_mounts if bind_mounts else []
self.init = init if init else ctx.container_init
self.host_network = host_network
self.memory_request = memory_request
self.memory_limit = memory_limit
self.container_cli_args = container_cli_args if container_cli_args else ctx.container_cli_args

@classmethod
def for_daemon(cls,
Expand All @@ -3469,7 +3471,6 @@ class CephContainer:
container_args: List[str] = [],
envs: Optional[List[str]] = None,
privileged: bool = False,
ptrace: bool = False,
bind_mounts: Optional[List[List[str]]] = None,
init: Optional[bool] = None,
host_network: bool = True,
Expand All @@ -3486,7 +3487,6 @@ class CephContainer:
container_args=container_args,
envs=envs,
privileged=privileged,
ptrace=ptrace,
bind_mounts=bind_mounts,
init=init,
host_network=host_network,
Expand Down Expand Up @@ -3536,6 +3536,9 @@ class CephContainer:
'--stop-signal=SIGTERM',
]

if self.container_cli_args is not None:
cmd_args.extend(shlex.split(self.container_cli_args))

if isinstance(self.ctx.container_engine, Podman):
if os.path.exists('/etc/ceph/podman-auth.json'):
cmd_args.append('--authfile=/etc/ceph/podman-auth.json')
Expand All @@ -3562,11 +3565,11 @@ class CephContainer:
'--privileged',
# let OSD etc read block devs that haven't been chowned
'--group-add=disk'])
if self.ptrace and not self.privileged:
# if privileged, the SYS_PTRACE cap is already added
# in addition, --cap-add and --privileged are mutually
# exclusive since podman >= 2.0
cmd_args.append('--cap-add=SYS_PTRACE')
guits marked this conversation as resolved.
Show resolved Hide resolved
if '--cap-add=PTRACE' in self.container_cli_args:
# if privileged, the SYS_PTRACE cap is already added
# in addition, --cap-add and --privileged are mutually
# exclusive since podman >= 2.0
self.container_cli_args.remove('--cap-add=PTRACE')
if self.init:
cmd_args.append('--init')
envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
Expand Down Expand Up @@ -5120,6 +5123,9 @@ def command_bootstrap(ctx):

enable_cephadm_mgr_module(cli, wait_for_mgr_restart)

if ctx.container_cli_args is not None:
cli(['config', 'set', 'mgr', 'mgr/cephadm/container_cli_args', '--value={}'.format(ctx.container_cli_args), '--force'])

# ssh
if not ctx.skip_ssh:
prepare_ssh(ctx, cli, wait_for_mgr_restart)
Expand Down Expand Up @@ -5294,8 +5300,7 @@ def command_deploy(ctx):
uid, gid = extract_uid_gid(ctx)
make_var_run(ctx, ctx.fsid, uid, gid)

c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id,
ptrace=ctx.allow_ptrace)
c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id)
deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
config=config, keyring=keyring,
osd_fsid=ctx.osd_fsid,
Expand Down Expand Up @@ -5367,8 +5372,7 @@ def command_deploy(ctx):
if not ctx.reconfig and not redeploy:
daemon_ports.extend(cc.ports)
c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id,
privileged=cc.privileged,
ptrace=ctx.allow_ptrace)
privileged=cc.privileged)
deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
uid=cc.uid, gid=cc.gid, config=None,
keyring=None, reconfig=ctx.reconfig,
Expand Down Expand Up @@ -7928,6 +7932,9 @@ def _get_parser():
'--image',
help='container image. Can also be set via the "CEPHADM_IMAGE" '
'env var')
parser.add_argument(
'--container-cli-args',
help='Run the container cli with extra args')
parser.add_argument(
'--docker',
action='store_true',
Expand Down Expand Up @@ -8437,7 +8444,7 @@ def _get_parser():
parser_deploy.add_argument(
'--allow-ptrace',
action='store_true',
help='Allow SYS_PTRACE on daemon container')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This removes an option (--allow-ptrace) without deprecating it. If we cherry-pick this to pacific, it will destabilize users who might have relied on this.

What if you changed this to help=argparse.SUPPRESS and printed a warning to the user instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ktdreyer do you suggest I should drop this commit entirely?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's ok to drop it in Ceph's Quincy release.

If you're cherry-picking this change to the pacific branch, it's a good idea to not cherry-pick this removal to that stable branch. For example, I think Red Hat has existing documentation that refers to --allow-ptrace right?

help=argparse.SUPPRESS)
parser_deploy.add_argument(
'--container-init',
action='store_true',
Expand Down
18 changes: 7 additions & 11 deletions src/pybind/mgr/cephadm/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
default='root',
desc='mode for remote execution of cephadm',
),
Option(
'container_cli_args',
type='str',
default=None,
desc='Run the container cli with extra args',
),
Option(
'container_image_base',
default=DEFAULT_IMAGE,
Expand Down Expand Up @@ -238,16 +244,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
default=True,
desc='log to the "cephadm" cluster log channel"',
),
Option(
'allow_ptrace',
type='bool',
default=False,
desc='allow SYS_PTRACE capability on ceph containers',
long_desc='The SYS_PTRACE capability is needed to attach to a '
'process with gdb or strace. Enabling this options '
'can allow debugging daemons that encounter problems '
'at runtime.',
),
Option(
'container_init',
type='bool',
Expand Down Expand Up @@ -405,6 +401,7 @@ def __init__(self, *args: Any, **kwargs: Any):
self.host_check_interval = 0
self.max_count_per_host = 0
self.mode = ''
self.container_cli_args = None
self.container_image_base = ''
self.container_image_prometheus = ''
self.container_image_grafana = ''
Expand All @@ -416,7 +413,6 @@ def __init__(self, *args: Any, **kwargs: Any):
self.warn_on_stray_hosts = True
self.warn_on_stray_daemons = True
self.warn_on_failed_host_check = True
self.allow_ptrace = False
self.container_init = True
self.prometheus_alerts_path = ''
self.migration_current: Optional[int] = None
Expand Down
4 changes: 2 additions & 2 deletions src/pybind/mgr/cephadm/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -1102,8 +1102,6 @@ async def _create_daemon(self,

if reconfig:
daemon_spec.extra_args.append('--reconfig')
if self.mgr.allow_ptrace:
daemon_spec.extra_args.append('--allow-ptrace')

try:
eca = daemon_spec.extra_container_args
Expand Down Expand Up @@ -1282,6 +1280,8 @@ async def _run_cephadm(self,

# subcommand
final_args.append(command)
if self.mgr.container_cli_args is not None:
final_args.append('--container-cli-args="{}"'.format(self.mgr.container_cli_args))

# subcommand args
if not no_fsid:
Expand Down