Skip to content

Commit

Permalink
feat(cc_install_hotplug): trigger hook on known ec2 drivers (#4799)
Browse files Browse the repository at this point in the history
Add extra logic to only trigger hook-hotplug on NICs with known drivers
on EC2. This aviods the hook being triggered on any add/remove event on
net devices, causing uneeded CPU usage, as on instance that start and
stop a lot of docker containers, see [1].

Rename 10-cloud-init-hook-hotplug.rules to
90-cloud-init-hook-hotplug.rules as ID_NET_DRIVER is not defined until
[2]80-net-setup-link.rules is sourced.

References:

[1] https://bugs.launchpad.net/cloud-init/+bug/1946003
[2] https://github.com/systemd/systemd/blob/main/rules.d/80-net-setup-link.rules
  • Loading branch information
aciba90 authored and blackboxsw committed Feb 16, 2024
1 parent 0e9a019 commit a589842
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 9 deletions.
19 changes: 15 additions & 4 deletions cloudinit/config/cc_install_hotplug.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
This module will install the udev rules to enable hotplug if
supported by the datasource and enabled in the userdata. The udev
rules will be installed as
``/etc/udev/rules.d/10-cloud-init-hook-hotplug.rules``.
``/etc/udev/rules.d/90-cloud-init-hook-hotplug.rules``.
When hotplug is enabled, newly added network devices will be added
to the system by cloud-init. After udev detects the event,
Expand Down Expand Up @@ -59,10 +59,12 @@
LOG = logging.getLogger(__name__)


HOTPLUG_UDEV_PATH = "/etc/udev/rules.d/10-cloud-init-hook-hotplug.rules"
# 90 to be sorted after 80-net-setup-link.rules which sets ID_NET_DRIVER and
# some datasources match on drivers
HOTPLUG_UDEV_PATH = "/etc/udev/rules.d/90-cloud-init-hook-hotplug.rules"
HOTPLUG_UDEV_RULES_TEMPLATE = """\
# Installed by cloud-init due to network hotplug userdata
ACTION!="add|remove", GOTO="cloudinit_end"
ACTION!="add|remove", GOTO="cloudinit_end"{extra_rules}
LABEL="cloudinit_hook"
SUBSYSTEM=="net", RUN+="{libexecdir}/hook-hotplug"
LABEL="cloudinit_end"
Expand Down Expand Up @@ -104,12 +106,21 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:
LOG.debug("Skipping hotplug install, udevadm not found")
return

extra_rules = (
cloud.datasource.extra_hotplug_udev_rules
if cloud.datasource.extra_hotplug_udev_rules is not None
else ""
)
if extra_rules:
extra_rules = "\n" + extra_rules
# This may need to turn into a distro property at some point
libexecdir = "/usr/libexec/cloud-init"
if not os.path.exists(libexecdir):
libexecdir = "/usr/lib/cloud-init"
util.write_file(
filename=HOTPLUG_UDEV_PATH,
content=HOTPLUG_UDEV_RULES_TEMPLATE.format(libexecdir=libexecdir),
content=HOTPLUG_UDEV_RULES_TEMPLATE.format(
extra_rules=extra_rules, libexecdir=libexecdir
),
)
subp.subp(["udevadm", "control", "--reload-rules"])
13 changes: 13 additions & 0 deletions cloudinit/sources/DataSourceEc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ def skip_404_tag_errors(exception):
# Cloud platforms that support IMDSv2 style metadata server
IDMSV2_SUPPORTED_CLOUD_PLATFORMS = [CloudNames.AWS, CloudNames.ALIYUN]

# Only trigger hook-hotplug on NICs with Ec2 drivers. Avoid triggering
# it on docker virtual NICs and the like. LP: #1946003
_EXTRA_HOTPLUG_UDEV_RULES = """
ENV{ID_NET_DRIVER}=="vif|ena|ixgbevf", GOTO="cloudinit_hook"
GOTO="cloudinit_end"
"""


class DataSourceEc2(sources.DataSource):
dsname = "Ec2"
Expand Down Expand Up @@ -97,10 +104,16 @@ class DataSourceEc2(sources.DataSource):
}
}

extra_hotplug_udev_rules = _EXTRA_HOTPLUG_UDEV_RULES

def __init__(self, sys_cfg, distro, paths):
super(DataSourceEc2, self).__init__(sys_cfg, distro, paths)
self.metadata_address = None

def _unpickle(self, ci_pkl_version: int) -> None:
super()._unpickle(ci_pkl_version)
self.extra_hotplug_udev_rules = _EXTRA_HOTPLUG_UDEV_RULES

def _get_cloud_name(self):
"""Return the cloud name as identified during _get_data."""
return identify_platform()
Expand Down
5 changes: 5 additions & 0 deletions cloudinit/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,9 @@ class DataSource(CloudInitPickleMixin, metaclass=abc.ABCMeta):
# in the updated metadata
skip_hotplug_detect = False

# Extra udev rules for cc_install_hotplug
extra_hotplug_udev_rules: Optional[str] = None

_ci_pkl_version = 1

def __init__(self, sys_cfg, distro: Distro, paths: Paths, ud_proc=None):
Expand Down Expand Up @@ -344,6 +347,8 @@ def _unpickle(self, ci_pkl_version: int) -> None:
e,
)
raise DatasourceUnpickleUserDataError() from e
if not hasattr(self, "extra_hotplug_udev_rules"):
self.extra_hotplug_udev_rules = None

def __str__(self):
return type_utils.obj_name(self)
Expand Down
2 changes: 1 addition & 1 deletion systemd/cloud-init-hotplugd.service
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Paired with cloud-init-hotplugd.socket to read from the FIFO
# /run/cloud-init/hook-hotplug-cmd which is created during a udev network
# add or remove event as processed by 10-cloud-init-hook-hotplug.rules.
# add or remove event as processed by 90-cloud-init-hook-hotplug.rules.

# On start, read args from the FIFO, process and provide structured arguments
# to `cloud-init devel hotplug-hook` which will setup or teardown network
Expand Down
2 changes: 1 addition & 1 deletion systemd/cloud-init-hotplugd.socket
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# cloud-init-hotplugd.socket listens on the FIFO file
# /run/cloud-init/hook-hotplug-cmd which is created during a udev network
# add or remove event as processed by 10-cloud-init-hook-hotplug.rules.
# add or remove event as processed by 90-cloud-init-hook-hotplug.rules.

# Known bug with an enforcing SELinux policy: LP: #1936229
[Unit]
Expand Down
28 changes: 26 additions & 2 deletions tests/integration_tests/modules/test_hotplug.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_hotplug_add_remove(client: IntegrationInstance):
log = client.read_from_file("/var/log/cloud-init.log")
assert "Exiting hotplug handler" not in log
assert client.execute(
"test -f /etc/udev/rules.d/10-cloud-init-hook-hotplug.rules"
"test -f /etc/udev/rules.d/90-cloud-init-hook-hotplug.rules"
).ok

# Add new NIC
Expand Down Expand Up @@ -109,7 +109,7 @@ def test_no_hotplug_in_userdata(client: IntegrationInstance):
log = client.read_from_file("/var/log/cloud-init.log")
assert "Exiting hotplug handler" not in log
assert client.execute(
"test -f /etc/udev/rules.d/10-cloud-init-hook-hotplug.rules"
"test -f /etc/udev/rules.d/90-cloud-init-hook-hotplug.rules"
).failed

# Add new NIC
Expand Down Expand Up @@ -219,3 +219,27 @@ def test_multi_nic_hotplug(setup_image, session_cloud: IntegrationCloud):
)
with contextlib.suppress(Exception):
ec2.release_address(AllocationId=allocation["AllocationId"])


@pytest.mark.skipif(PLATFORM != "ec2", reason="test is ec2 specific")
@pytest.mark.user_data(USER_DATA)
def test_no_hotplug_triggered_by_docker(client: IntegrationInstance):
# Install docker
r = client.execute("curl -fsSL https://get.docker.com | sh")
assert r.ok, r.stderr

# Start and stop a container
r = client.execute("docker run -dit --name ff ubuntu:focal")
assert r.ok, r.stderr
r = client.execute("docker stop ff")
assert r.ok, r.stderr

# Verify hotplug-hook was not called
log = client.read_from_file("/var/log/cloud-init.log")
assert "Exiting hotplug handler" not in log
assert "hotplug-hook" not in log

# Verify hotplug was enabled
assert "enabled" == client.execute(
"cloud-init devel hotplug-hook -s net query"
)
44 changes: 43 additions & 1 deletion tests/unittests/config/test_cc_install_hotplug.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
handle,
)
from cloudinit.event import EventScope, EventType
from cloudinit.sources.DataSourceEc2 import DataSourceEc2


@pytest.fixture()
Expand Down Expand Up @@ -51,6 +52,7 @@ def test_rules_installed_when_supported_and_enabled(
m_cloud.datasource.get_supported_events.return_value = {
EventScope.NETWORK: {EventType.HOTPLUG}
}
m_cloud.datasource.extra_hotplug_udev_rules = None

if libexec_exists:
libexecdir = "/usr/libexec/cloud-init"
Expand All @@ -61,7 +63,7 @@ def test_rules_installed_when_supported_and_enabled(
mocks.m_write.assert_called_once_with(
filename=HOTPLUG_UDEV_PATH,
content=HOTPLUG_UDEV_RULES_TEMPLATE.format(
libexecdir=libexecdir
extra_rules="", libexecdir=libexecdir
),
)
assert mocks.m_subp.call_args_list == [
Expand Down Expand Up @@ -127,3 +129,43 @@ def test_rules_not_installed_when_no_udevadm(self, mocks):
assert mocks.m_del.call_args_list == []
assert mocks.m_write.call_args_list == []
assert mocks.m_subp.call_args_list == []

def test_rules_installed_on_ec2(self, mocks):
mocks.m_which.return_value = "udevadm"
mocks.m_update_enabled.return_value = True
m_cloud = mock.MagicMock()
m_cloud.datasource.get_supported_events.return_value = {
EventScope.NETWORK: {EventType.HOTPLUG}
}
m_cloud.datasource.extra_hotplug_udev_rules = (
DataSourceEc2.extra_hotplug_udev_rules
)

with mock.patch("os.path.exists", return_value=True):
handle(None, {}, m_cloud, None)

udev_rules = """\
# Installed by cloud-init due to network hotplug userdata
ACTION!="add|remove", GOTO="cloudinit_end"
ENV{ID_NET_DRIVER}=="vif|ena|ixgbevf", GOTO="cloudinit_hook"
GOTO="cloudinit_end"
LABEL="cloudinit_hook"
SUBSYSTEM=="net", RUN+="/usr/libexec/cloud-init/hook-hotplug"
LABEL="cloudinit_end"
"""
mocks.m_write.assert_called_once_with(
filename=HOTPLUG_UDEV_PATH,
content=udev_rules,
)
assert mocks.m_subp.call_args_list == [
mock.call(
[
"udevadm",
"control",
"--reload-rules",
]
)
]
assert mocks.m_del.call_args_list == []

0 comments on commit a589842

Please sign in to comment.