Skip to content

Commit

Permalink
fix: Make wait_for_url respect explicit arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
TheRealFalcon committed Mar 8, 2024
1 parent 29edefe commit 9c77b38
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 17 deletions.
48 changes: 31 additions & 17 deletions cloudinit/url_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,14 +473,14 @@ def dual_stack(

def wait_for_url(
urls,
max_wait=None,
timeout=None,
max_wait: float = float("inf"),
timeout: Optional[float] = None,
status_cb: Callable = LOG.debug, # some sources use different log levels
headers_cb: Optional[Callable] = None,
headers_redact=None,
sleep_time: int = 1,
sleep_time: Optional[float] = None,
exception_cb: Optional[Callable] = None,
sleep_time_cb: Optional[Callable[[Any, int], int]] = None,
sleep_time_cb: Optional[Callable[[Any, float], float]] = None,
request_method: str = "",
connect_synchronously: bool = True,
async_delay: float = 0.150,
Expand All @@ -496,10 +496,15 @@ def wait_for_url(
headers_cb: call method with single argument of url to get headers
for request.
headers_redact: a list of header names to redact from the log
sleep_time: Amount of time to sleep between retries. If this and
sleep_time_cb are None, the default sleep time
defaults to 1 second and increases by 1 seconds every 5
tries. Cannot be specified along with `sleep_time_cb`.
exception_cb: call method with 2 arguments 'msg' (per status_cb) and
'exception', the exception that occurred.
sleep_time_cb: call method with 2 arguments (response, loop_n) that
generates the next sleep time.
generates the next sleep time. Cannot be specified
along with 'sleep_time`.
request_method: indicate the type of HTTP request, GET, PUT, or POST
connect_synchronously: if false, enables executing requests in parallel
async_delay: delay before parallel metadata requests, see RFC 6555
Expand All @@ -520,17 +525,19 @@ def wait_for_url(
data host (169.254.169.254) may be firewalled off Entirely for a system,
meaning that the connection will block forever unless a timeout is set.
A value of None for max_wait will retry indefinitely.
The default value for max_wait will retry indefinitely.
"""

def default_sleep_time(_, loop_number: int) -> int:
return int(loop_number / 5) + 1
def default_sleep_time(_, loop_number: int) -> float:
return sleep_time if sleep_time is not None else loop_number // 5 + 1

def timeup(max_wait, start_time):
def timeup(max_wait: float, start_time: float, sleep_time: float = 0):
"""Check if time is up based on start time and max wait"""
if max_wait is None:
if max_wait in (float("inf"), None):
return False
return (max_wait <= 0) or (time.time() - start_time > max_wait)
return (max_wait <= 0) or (
time.time() - start_time + sleep_time > max_wait
)

def handle_url_response(response, url):
"""Map requests response code/contents to internal "UrlError" type"""
Expand Down Expand Up @@ -641,6 +648,8 @@ def read_url_parallel(start_time, timeout, exc_cb, log_cb):
return out

start_time = time.time()
if sleep_time and sleep_time_cb:
raise ValueError("sleep_time and sleep_time_cb are mutually exclusive")

# Dual-stack support factored out serial and parallel execution paths to
# allow the retry loop logic to exist separately from the http calls.
Expand All @@ -656,25 +665,30 @@ def read_url_parallel(start_time, timeout, exc_cb, log_cb):
loop_n: int = 0
response = None
while True:
sleep_time = calculate_sleep_time(response, loop_n)
current_sleep_time = calculate_sleep_time(response, loop_n)

url = do_read_url(start_time, timeout, exception_cb, status_cb)
if url:
address, response = url
return (address, response.contents)

if timeup(max_wait, start_time):
if timeup(max_wait, start_time, current_sleep_time):
break

loop_n = loop_n + 1
LOG.debug(
"Please wait %s seconds while we wait to try again", sleep_time
"Please wait %s seconds while we wait to try again",
current_sleep_time,
)
time.sleep(sleep_time)
time.sleep(current_sleep_time)

# shorten timeout to not run way over max_time
# timeout=0.0 causes exceptions in urllib, set to None if zero
timeout = int((start_time + max_wait) - time.time()) or None
current_time = time.time()
if timeout and current_time + timeout > start_time + max_wait:
timeout = max_wait - (current_time - start_time)
if timeout <= 0:
# We've already exceeded our max_wait. Time to bail.
break

LOG.error("Timed out, no response from urls: %s", urls)
return False, None
Expand Down
84 changes: 84 additions & 0 deletions tests/unittests/test_url_helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# This file is part of cloud-init. See LICENSE file for license information.
# pylint: disable=attribute-defined-outside-init

import logging
import re
Expand Down Expand Up @@ -471,6 +472,21 @@ class TestUrlHelper:
fail = "FAIL"
event = Event()

@pytest.fixture
def retry_mocks(self, mocker):
self.mock_time_value = 0
m_readurl = mocker.patch(
f"{M_PATH}readurl", side_effect=self.readurl_side_effect
)
m_sleep = mocker.patch(
f"{M_PATH}time.sleep", side_effect=self.sleep_side_effect
)
mocker.patch(f"{M_PATH}time.time", side_effect=self.time_side_effect)

yield m_readurl, m_sleep

self.mock_time_value = 0

@classmethod
def response_wait(cls, _request):
cls.event.wait(0.1)
Expand Down Expand Up @@ -562,3 +578,71 @@ def test_timeout(self, caplog):
assert re.search(
r"open 'https:\/\/sleep1\/'.*Timed out", caplog.text, re.DOTALL
)

def test_explicit_arguments(self, retry_mocks):
"""Ensure that explicit arguments are respected"""
m_readurl, m_sleep = retry_mocks
wait_for_url(
urls=["http://localhost/"],
max_wait=23,
timeout=5,
sleep_time=3,
)

assert len(m_readurl.call_args_list) == 3
assert len(m_sleep.call_args_list) == 2

for readurl_call in m_readurl.call_args_list:
assert readurl_call[1]["timeout"] == 5
for sleep_call in m_sleep.call_args_list:
assert sleep_call[0][0] == 3

# Call 1 starts 0
# Call 2 starts at 8-ish after 5 second timeout and 3 second sleep
# Call 3 starts at 16-ish for same reasons
# The 5 second timeout puts us at 21-ish and now we break
# because 21-ish + the sleep time puts us over max wait of 23
assert pytest.approx(self.mock_time_value) == 21

def test_shortened_timeout(self, retry_mocks):
"""Test that we shorten the last timeout to align with max_wait"""
m_readurl, _m_sleep = retry_mocks
wait_for_url(
urls=["http://localhost/"], max_wait=10, timeout=9, sleep_time=0
)

assert len(m_readurl.call_args_list) == 2
assert m_readurl.call_args_list[-1][1]["timeout"] == pytest.approx(1)

def test_default_sleep_time(self, retry_mocks):
"""Test default sleep behavior when not specified"""
_m_readurl, m_sleep = retry_mocks
wait_for_url(
urls=["http://localhost/"],
max_wait=50,
timeout=1,
)

expected_sleep_times = [1] * 5 + [2] * 5 + [3] * 5
actual_sleep_times = [
m_sleep.call_args_list[i][0][0]
for i in range(len(m_sleep.call_args_list))
]
assert actual_sleep_times == expected_sleep_times

# These side effect methods are a way of having a somewhat predictable
# output for time.time(). Otherwise, we have to track too many calls
# to time.time() and unrelated changes to code being called could cause
# these tests to fail.
# 0.0000001 is added to simulate additional execution time but keep it
# small enough for pytest.approx() to work
def sleep_side_effect(self, sleep_time):
self.mock_time_value += sleep_time + 0.0000001

def time_side_effect(self):
return self.mock_time_value

def readurl_side_effect(self, *args, **kwargs):
if "timeout" in kwargs:
self.mock_time_value += kwargs["timeout"] + 0.0000001
raise UrlError("test")

0 comments on commit 9c77b38

Please sign in to comment.