Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[stable-2.14] ansible-test - Improve container startup handling. #79609

Merged
merged 1 commit into from Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions changelogs/fragments/ansible-test-container-management.yml
Expand Up @@ -56,6 +56,8 @@ bugfixes:
- ansible-test - Detection for running in a Podman or Docker container has been fixed to detect more scenarios.
The new detection relies on ``/proc/self/mountinfo`` instead of ``/proc/self/cpuset``.
Detection now works with custom cgroups and private cgroup namespaces.
- ansible-test - Avoid using ``exec`` after container startup when possible.
This improves container startup performance and avoids intermittent startup issues with some old containers.
known_issues:
- ansible-test - Using Docker on systems with SELinux may require setting SELinux to permissive mode.
Podman should work with SELinux in enforcing mode.
Expand Down
50 changes: 47 additions & 3 deletions test/integration/targets/ansible-test-container/runme.py
Expand Up @@ -149,10 +149,29 @@ def get_test_scenarios() -> list[TestScenario]:
image = settings['image']
cgroup = settings.get('cgroup', 'v1-v2')

if container_name == 'centos6' and os_release.id == 'alpine':
# Alpine kernels do not emulate vsyscall by default, which causes the centos6 container to fail during init.
# See: https://unix.stackexchange.com/questions/478387/running-a-centos-docker-image-on-arch-linux-exits-with-code-139
# Other distributions enable settings which trap vsyscall by default.
# See: https://www.kernelconfig.io/config_legacy_vsyscall_xonly
# See: https://www.kernelconfig.io/config_legacy_vsyscall_emulate
continue

for engine in available_engines:
# TODO: figure out how to get tests passing using docker without disabling selinux
disable_selinux = os_release.id == 'fedora' and engine == 'docker' and cgroup != 'none'
expose_cgroup_v1 = cgroup == 'v1-only' and get_docker_info(engine).cgroup_version != 1
debug_systemd = cgroup != 'none'

# The sleep+pkill used to support the cgroup probe causes problems with the centos6 container.
# It results in sshd connections being refused or reset for many, but not all, container instances.
# The underlying cause of this issue is unknown.
probe_cgroups = container_name != 'centos6'

# The default RHEL 9 crypto policy prevents use of SHA-1.
# This results in SSH errors with centos6 containers: ssh_dispatch_run_fatal: Connection to 1.2.3.4 port 22: error in libcrypto
# See: https://access.redhat.com/solutions/6816771
enable_sha1 = os_release.id == 'rhel' and os_release.version_id.startswith('9.') and container_name == 'centos6'

if cgroup != 'none' and get_docker_info(engine).cgroup_version == 1 and not have_cgroup_systemd():
expose_cgroup_v1 = True # the host uses cgroup v1 but there is no systemd cgroup and the container requires cgroup support
Expand Down Expand Up @@ -182,6 +201,9 @@ def get_test_scenarios() -> list[TestScenario]:
image=image,
disable_selinux=disable_selinux,
expose_cgroup_v1=expose_cgroup_v1,
enable_sha1=enable_sha1,
debug_systemd=debug_systemd,
probe_cgroups=probe_cgroups,
)
)

Expand All @@ -195,11 +217,21 @@ def run_test(scenario: TestScenario) -> TestResult:
start = time.monotonic()

integration = ['ansible-test', 'integration', 'split']
integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v', '--dev-probe-cgroups', str(LOG_PATH),
'--dev-systemd-debug']
integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v']
target_only_options = []

if scenario.debug_systemd:
integration_options.append('--dev-systemd-debug')

if scenario.probe_cgroups:
target_only_options = ['--dev-probe-cgroups', str(LOG_PATH)]

commands = [
[*integration, *integration_options],
# The cgroup probe is only performed for the first test of the target.
# There's no need to repeat the probe again for the same target.
# The controller will be tested separately as a target.
# This ensures that both the probe and no-probe code paths are functional.
[*integration, *integration_options, *target_only_options],
# For the split test we'll use alpine3 as the controller. There are two reasons for this:
# 1) It doesn't require the cgroup v1 hack, so we can test a target that doesn't need that.
# 2) It doesn't require disabling selinux, so we can test a target that doesn't need that.
Expand Down Expand Up @@ -260,12 +292,18 @@ def run_test(scenario: TestScenario) -> TestResult:
if scenario.disable_selinux:
run_command('setenforce', 'permissive')

if scenario.enable_sha1:
run_command('update-crypto-policies', '--set', 'DEFAULT:SHA1')

for test_command in test_commands:
retry_command(lambda: run_command(*test_command))
except SubprocessError as ex:
message = str(ex)
display.error(f'{scenario} {message}')
finally:
if scenario.enable_sha1:
run_command('update-crypto-policies', '--set', 'DEFAULT')

if scenario.disable_selinux:
run_command('setenforce', 'enforcing')

Expand Down Expand Up @@ -519,6 +557,9 @@ class TestScenario:
image: str
disable_selinux: bool
expose_cgroup_v1: bool
enable_sha1: bool
debug_systemd: bool
probe_cgroups: bool

@property
def tags(self) -> tuple[str, ...]:
Expand All @@ -536,6 +577,9 @@ def tags(self) -> tuple[str, ...]:
if self.expose_cgroup_v1:
tags.append('cgroup: v1')

if self.enable_sha1:
tags.append('sha1: enabled')

return tuple(tags)

@property
Expand Down
33 changes: 27 additions & 6 deletions test/lib/ansible_test/_internal/host_profiles.py
Expand Up @@ -411,6 +411,7 @@ class InitConfig:
"""Configuration details required to run the container init."""
options: list[str]
command: str
command_privileged: bool
expected_mounts: tuple[CGroupMount, ...]

@property
Expand Down Expand Up @@ -452,12 +453,12 @@ def provision(self) -> None:
publish_ports=not self.controller, # connections to the controller over SSH are not required
options=init_config.options,
cleanup=CleanupMode.NO,
cmd=self.build_sleep_command() if init_config.command or init_probe else None,
cmd=self.build_init_command(init_config, init_probe),
)

if not container:
if self.args.prime_containers:
if init_config.command or init_probe:
if init_config.command_privileged or init_probe:
docker_pull(self.args, UTILITY_IMAGE)

return
Expand All @@ -467,7 +468,7 @@ def provision(self) -> None:
try:
options = ['--pid', 'host', '--privileged']

if init_config.command:
if init_config.command and init_config.command_privileged:
init_command = init_config.command

if not init_probe:
Expand Down Expand Up @@ -500,6 +501,7 @@ def get_podman_init_config(self) -> InitConfig:
"""Return init config for running under Podman."""
options = self.get_common_run_options()
command: t.Optional[str] = None
command_privileged = False
expected_mounts: tuple[CGroupMount, ...]

cgroup_version = get_docker_info(self.args).cgroup_version
Expand Down Expand Up @@ -651,13 +653,15 @@ def get_podman_init_config(self) -> InitConfig:
return self.InitConfig(
options=options,
command=command,
command_privileged=command_privileged,
expected_mounts=expected_mounts,
)

def get_docker_init_config(self) -> InitConfig:
"""Return init config for running under Docker."""
options = self.get_common_run_options()
command: t.Optional[str] = None
command_privileged = False
expected_mounts: tuple[CGroupMount, ...]

cgroup_version = get_docker_info(self.args).cgroup_version
Expand Down Expand Up @@ -724,7 +728,9 @@ def get_docker_init_config(self) -> InitConfig:
elif self.config.cgroup in (CGroupVersion.V1_V2, CGroupVersion.V2_ONLY) and cgroup_version == 2:
# Docker hosts providing cgroup v2 will give each container a read-only cgroup mount.
# It must be remounted read-write before systemd starts.
# This must be done in a privileged container, otherwise a "permission denied" error can occur.
command = 'mount -o remount,rw /sys/fs/cgroup/'
command_privileged = True

options.extend((
# A private cgroup namespace is used to avoid exposing the host cgroup to the container.
Expand Down Expand Up @@ -768,12 +774,14 @@ def get_docker_init_config(self) -> InitConfig:
return self.InitConfig(
options=options,
command=command,
command_privileged=command_privileged,
expected_mounts=expected_mounts,
)

def build_sleep_command(self) -> list[str]:
def build_init_command(self, init_config: InitConfig, sleep: bool) -> t.Optional[list[str]]:
"""
Build and return the command to put the container to sleep.
Build and return the command to start in the container.
Returns None if the default command for the container should be used.

The sleep duration below was selected to:

Expand All @@ -783,10 +791,23 @@ def build_sleep_command(self) -> list[str]:

NOTE: The container must have a POSIX-compliant default shell "sh" with a non-builtin "sleep" command.
"""
command = ''

if init_config.command and not init_config.command_privileged:
command += f'{init_config.command} && '

if sleep or init_config.command_privileged:
command += 'sleep 60 ; '

if not command:
return None

docker_pull(self.args, self.config.image)
inspect = docker_image_inspect(self.args, self.config.image)

return ['sh', '-c', f'sleep 60; exec {shlex.join(inspect.cmd)}']
command += f'exec {shlex.join(inspect.cmd)}'

return ['sh', '-c', command]

@property
def wake_command(self) -> list[str]:
Expand Down