Skip to content

Commit

Permalink
net/dhcp: handle timeouts for dhcpcd (#5022)
Browse files Browse the repository at this point in the history
I'm not sure 10 seconds is enough of a timeout.  Azure will retry DHCP
failures, but I expect the reduction from 300s/60s -> 10s will result in
unexpected failures for other clouds where there is no retry.

I see that dhcpcd offers a --timeout option which might be better than
relying on subprocess timeout?  Something to consider.

In the meantime:

- catch the TimeoutExpired exception and raise NoDhcpLeaseError to
  address below traceback

- update the "dhclient" logs to "dhcpcd"

```
Traceback (most recent call last):
  File "/usr/lib/python3/dist-packages/cloudinit/sources/DataSourceAzure.py", line 851, in _get_data
    crawled_data = util.log_time(
                   ^^^^^^^^^^^^^^
  File "/usr/lib/python3/dist-packages/cloudinit/util.py", line 2825, in log_time
    ret = func(*args, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3/dist-packages/cloudinit/sources/helpers/azure.py", line 45, in impl
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3/dist-packages/cloudinit/sources/DataSourceAzure.py", line 630, in crawl_metadata
    self._setup_ephemeral_networking(timeout_minutes=timeout_minutes)
  File "/usr/lib/python3/dist-packages/cloudinit/sources/helpers/azure.py", line 45, in impl
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3/dist-packages/cloudinit/sources/DataSourceAzure.py", line 440, in _setup_ephemeral_networking
    lease = self._ephemeral_dhcp_ctx.obtain_lease()
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3/dist-packages/cloudinit/net/ephemeral.py", line 288, in obtain_lease
    self.lease = maybe_perform_dhcp_discovery(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3/dist-packages/cloudinit/net/dhcp.py", line 102, in maybe_perform_dhcp_discovery
    return distro.dhcp_client.dhcp_discovery(interface, dhcp_log_func, distro)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3/dist-packages/cloudinit/net/dhcp.py", line 649, in dhcp_discovery
    out, err = subp.subp(
               ^^^^^^^^^^
  File "/usr/lib/python3/dist-packages/cloudinit/subp.py", line 276, in subp
    out, err = sp.communicate(data, timeout=timeout)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/subprocess.py", line 1209, in communicate
    stdout, stderr = self._communicate(input, endtime, timeout)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/subprocess.py", line 2114, in _communicate
    self._check_timeout(endtime, orig_timeout, stdout, stderr)
  File "/usr/lib/python3.12/subprocess.py", line 1253, in _check_timeout
    raise TimeoutExpired(
subprocess.TimeoutExpired: Command '[b'/usr/sbin/dhcpcd', b'--ipv4only', b'--waitip', b'--persistent', b'--noarp', b'--script=/bin/true', b'eth0']' timed out after 10 seconds
```

Signed-off-by: Chris Patterson <cpatterson@microsoft.com>
  • Loading branch information
cjp256 committed Mar 8, 2024
1 parent 9c77b38 commit faeca64
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 4 deletions.
11 changes: 10 additions & 1 deletion cloudinit/net/dhcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import time
from contextlib import suppress
from io import StringIO
from subprocess import TimeoutExpired
from typing import Any, Callable, Dict, List, Optional, Tuple

import configobj
Expand Down Expand Up @@ -698,9 +699,17 @@ def dhcp_discovery(
return lease
raise NoDHCPLeaseError("No lease found")

except TimeoutExpired as error:
LOG.debug(
"dhcpcd timed out after %s seconds: stderr: %r stdout: %r",
error.timeout,
error.stderr,
error.stdout,
)
raise NoDHCPLeaseError from error
except subp.ProcessExecutionError as error:
LOG.debug(
"dhclient exited with code: %s stderr: %r stdout: %r",
"dhcpcd exited with code: %s stderr: %r stdout: %r",
error.exit_code,
error.stderr,
error.stdout,
Expand Down
54 changes: 51 additions & 3 deletions tests/unittests/net/test_dhcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import signal
import socket
import subprocess
from textwrap import dedent

import pytest
Expand Down Expand Up @@ -1132,9 +1133,11 @@ def test_get_newest_lease_file_from_distro_debian(self, *_):
# otherwise mock a reply with leasefile
@mock.patch(
"os.listdir",
side_effect=lambda x: []
if x == "/var/lib/NetworkManager"
else ["some_file", "!@#$-eth0.lease", "some_other_file"],
side_effect=lambda x: (
[]
if x == "/var/lib/NetworkManager"
else ["some_file", "!@#$-eth0.lease", "some_other_file"]
),
)
@mock.patch("os.path.getmtime", return_value=123.45)
def test_fallback_when_nothing_found(self, *_):
Expand Down Expand Up @@ -1284,6 +1287,51 @@ def test_dhcpcd_discovery_ib(
]
)

@mock.patch("cloudinit.net.dhcp.subp.which", return_value="/sbin/dhcpcd")
@mock.patch("cloudinit.net.dhcp.os.killpg")
@mock.patch("cloudinit.net.dhcp.subp.subp")
@mock.patch("cloudinit.util.load_json")
@mock.patch("cloudinit.util.load_binary_file")
@mock.patch("cloudinit.util.write_file")
def test_dhcpcd_discovery_timeout(
self,
m_write_file,
m_load_file,
m_loadjson,
m_subp,
m_remove,
m_which,
):
"""Verify dhcpcd timeout results in NoDHCPLeaseError exception."""
m_subp.side_effect = [
SubpResult("a=b", ""),
subprocess.TimeoutExpired(
"/sbin/dhcpcd", timeout=6, output="testout", stderr="testerr"
),
]
with pytest.raises(NoDHCPLeaseError):
Dhcpcd().dhcp_discovery("eth0", distro=MockDistro())

m_subp.assert_has_calls(
[
mock.call(
["ip", "link", "set", "dev", "eth0", "up"],
),
mock.call(
[
"/sbin/dhcpcd",
"--ipv4only",
"--waitip",
"--persistent",
"--noarp",
"--script=/bin/true",
"eth0",
],
timeout=Dhcpcd.timeout,
),
]
)


class TestMaybePerformDhcpDiscovery:
def test_none_and_missing_fallback(self):
Expand Down

0 comments on commit faeca64

Please sign in to comment.