Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Azure helper: Increase Azure Endpoint HTTP retries #619

Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
eec09f2
Azure helper: Modify AzureEndpointHttpClient to use http_with_retries
johnsonshi Oct 19, 2020
f5978b7
Azure: Add skeleton UTs for http_with_retries
johnsonshi Oct 19, 2020
178c697
Azure helper: Change http_with_retries so that it is not a static met…
johnsonshi Oct 19, 2020
8f98780
Azure helper: Add UT class TestAzureHelperHttpWithRetries
johnsonshi Oct 20, 2020
d3be1a4
Merge branch 'master' into azure-increase-wireserver-interaction-retries
johnsonshi Oct 20, 2020
c4146a9
Merge branch 'master' into azure-increase-wireserver-interaction-retries
johnsonshi Oct 20, 2020
254e0ed
Azure helper: Update http_with_retries kwargs and messaging
johnsonshi Oct 20, 2020
3f6e920
Azure helper: Update UTs and Azure helper http_with_retries helper
johnsonshi Oct 21, 2020
298a90a
Azure helper: http_with_retry use url arg instead of url kwarg
johnsonshi Oct 21, 2020
8c01f7d
Azure helper: Update UTs
johnsonshi Oct 22, 2020
fe30eec
Merge branch 'master' into azure-increase-wireserver-interaction-retries
mitechie Nov 2, 2020
74ec5c6
Merge branch 'master' into azure-increase-wireserver-interaction-retries
johnsonshi Nov 18, 2020
8dfe059
azure: Add comment on http_with_retries about custom telemetry logging
johnsonshi Nov 18, 2020
d8211a3
azure: Improved logging for http_with_retries
johnsonshi Nov 18, 2020
81b3a9e
azure http_with_retries: Improved 1-based index iteration for retries
johnsonshi Nov 18, 2020
2acf8de
Merge branch 'master' into azure-increase-wireserver-interaction-retries
OddBloke Nov 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions cloudinit/sources/helpers/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,51 @@ def _get_dhcp_endpoint_option_name():
return azure_endpoint


@azure_ds_telemetry_reporter
def http_with_retries(url, **kwargs) -> str:
exc = None
johnsonshi marked this conversation as resolved.
Show resolved Hide resolved

max_readurl_attempts = 240
default_readurl_timeout = 5
periodic_logging_attempts = 12

if 'timeout' not in kwargs:
kwargs['timeout'] = default_readurl_timeout

# remove kwargs that cause url_helper.readurl to retry,
# since we are already implementing our own retry logic.
if 'retries' in kwargs:
kwargs.pop('retries', None)
LOG.warning(
'retries kwarg passed in for communication with Azure endpoint.')
johnsonshi marked this conversation as resolved.
Show resolved Hide resolved
if 'infinite' in kwargs:
kwargs.pop('infinite', None)
LOG.warning(
'infinite kwarg passed in for communication with Azure endpoint.')
johnsonshi marked this conversation as resolved.
Show resolved Hide resolved

for attempt in range(max_readurl_attempts):
johnsonshi marked this conversation as resolved.
Show resolved Hide resolved
try:
ret = url_helper.readurl(url, **kwargs)

report_diagnostic_event(
'Successful HTTP request with Azure endpoint %s after '
'%d attempts' % (url, attempt + 1),
johnsonshi marked this conversation as resolved.
Show resolved Hide resolved
logger_func=LOG.debug)

return ret

except Exception as e:
exc = e
if (attempt + 1) % periodic_logging_attempts == 0:
johnsonshi marked this conversation as resolved.
Show resolved Hide resolved
report_diagnostic_event(
'Failed HTTP request with Azure endpoint %s during '
'attempt %d with exception: %s' %
(url, attempt + 1, e),
johnsonshi marked this conversation as resolved.
Show resolved Hide resolved
logger_func=LOG.debug)

raise exc


class AzureEndpointHttpClient:

headers = {
Expand All @@ -276,16 +321,15 @@ def get(self, url, secure=False):
if secure:
headers = self.headers.copy()
headers.update(self.extra_secure_headers)
return url_helper.readurl(url, headers=headers,
timeout=5, retries=10, sec_between=5)
return http_with_retries(url, headers=headers)

def post(self, url, data=None, extra_headers=None):
headers = self.headers
if extra_headers is not None:
headers = self.headers.copy()
headers.update(extra_headers)
return url_helper.readurl(url, data=data, headers=headers,
timeout=5, retries=10, sec_between=5)
return http_with_retries(
url, data=data, headers=headers)


class InvalidGoalStateXMLException(Exception):
Expand Down
227 changes: 190 additions & 37 deletions tests/unittests/test_datasource/test_azure_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This file is part of cloud-init. See LICENSE file for license information.

import copy
import os
import re
import unittest
Expand Down Expand Up @@ -281,29 +282,25 @@ def setUp(self):
super(TestAzureEndpointHttpClient, self).setUp()
patches = ExitStack()
self.addCleanup(patches.close)

self.readurl = patches.enter_context(
mock.patch.object(azure_helper.url_helper, 'readurl'))
patches.enter_context(
mock.patch.object(azure_helper.time, 'sleep', mock.MagicMock()))
self.m_http_with_retries = patches.enter_context(
mock.patch.object(azure_helper, 'http_with_retries'))

def test_non_secure_get(self):
client = azure_helper.AzureEndpointHttpClient(mock.MagicMock())
url = 'MyTestUrl'
response = client.get(url, secure=False)
self.assertEqual(1, self.readurl.call_count)
self.assertEqual(self.readurl.return_value, response)
self.assertEqual(1, self.m_http_with_retries.call_count)
self.assertEqual(self.m_http_with_retries.return_value, response)
self.assertEqual(
mock.call(url, headers=self.regular_headers,
timeout=5, retries=10, sec_between=5),
self.readurl.call_args)
mock.call(url, headers=self.regular_headers),
self.m_http_with_retries.call_args)

def test_non_secure_get_raises_exception(self):
client = azure_helper.AzureEndpointHttpClient(mock.MagicMock())
self.readurl.side_effect = SentinelException
url = 'MyTestUrl'
with self.assertRaises(SentinelException):
client.get(url, secure=False)
self.m_http_with_retries.side_effect = SentinelException
self.assertRaises(SentinelException, client.get, url, secure=False)
self.assertEqual(1, self.m_http_with_retries.call_count)

def test_secure_get(self):
url = 'MyTestUrl'
Expand All @@ -315,39 +312,37 @@ def test_secure_get(self):
})
client = azure_helper.AzureEndpointHttpClient(m_certificate)
response = client.get(url, secure=True)
self.assertEqual(1, self.readurl.call_count)
self.assertEqual(self.readurl.return_value, response)
self.assertEqual(1, self.m_http_with_retries.call_count)
self.assertEqual(self.m_http_with_retries.return_value, response)
self.assertEqual(
mock.call(url, headers=expected_headers,
timeout=5, retries=10, sec_between=5),
self.readurl.call_args)
mock.call(url, headers=expected_headers),
self.m_http_with_retries.call_args)

def test_secure_get_raises_exception(self):
url = 'MyTestUrl'
client = azure_helper.AzureEndpointHttpClient(mock.MagicMock())
self.readurl.side_effect = SentinelException
with self.assertRaises(SentinelException):
client.get(url, secure=True)
self.m_http_with_retries.side_effect = SentinelException
self.assertRaises(SentinelException, client.get, url, secure=True)
self.assertEqual(1, self.m_http_with_retries.call_count)

def test_post(self):
m_data = mock.MagicMock()
url = 'MyTestUrl'
client = azure_helper.AzureEndpointHttpClient(mock.MagicMock())
response = client.post(url, data=m_data)
self.assertEqual(1, self.readurl.call_count)
self.assertEqual(self.readurl.return_value, response)
self.assertEqual(1, self.m_http_with_retries.call_count)
self.assertEqual(self.m_http_with_retries.return_value, response)
self.assertEqual(
mock.call(url, data=m_data, headers=self.regular_headers,
timeout=5, retries=10, sec_between=5),
self.readurl.call_args)
mock.call(url, data=m_data, headers=self.regular_headers),
self.m_http_with_retries.call_args)

def test_post_raises_exception(self):
m_data = mock.MagicMock()
url = 'MyTestUrl'
client = azure_helper.AzureEndpointHttpClient(mock.MagicMock())
self.readurl.side_effect = SentinelException
with self.assertRaises(SentinelException):
client.post(url, data=m_data)
self.m_http_with_retries.side_effect = SentinelException
self.assertRaises(SentinelException, client.post, url, data=m_data)
self.assertEqual(1, self.m_http_with_retries.call_count)

def test_post_with_extra_headers(self):
url = 'MyTestUrl'
Expand All @@ -356,21 +351,179 @@ def test_post_with_extra_headers(self):
client.post(url, extra_headers=extra_headers)
expected_headers = self.regular_headers.copy()
expected_headers.update(extra_headers)
self.assertEqual(1, self.readurl.call_count)
self.assertEqual(1, self.m_http_with_retries.call_count)
self.assertEqual(
mock.call(mock.ANY, data=mock.ANY, headers=expected_headers,
timeout=5, retries=10, sec_between=5),
self.readurl.call_args)
mock.call(url, data=mock.ANY, headers=expected_headers),
self.m_http_with_retries.call_args)

def test_post_with_sleep_with_extra_headers_raises_exception(self):
m_data = mock.MagicMock()
url = 'MyTestUrl'
extra_headers = {'test': 'header'}
client = azure_helper.AzureEndpointHttpClient(mock.MagicMock())
self.readurl.side_effect = SentinelException
with self.assertRaises(SentinelException):
client.post(
url, data=m_data, extra_headers=extra_headers)
self.m_http_with_retries.side_effect = SentinelException
self.assertRaises(
SentinelException, client.post,
url, data=m_data, extra_headers=extra_headers)
self.assertEqual(1, self.m_http_with_retries.call_count)


class TestAzureHelperHttpWithRetries(CiTestCase):

with_logs = True

max_readurl_attempts = 240
default_readurl_timeout = 5
periodic_logging_attempts = 12

def setUp(self):
super(TestAzureHelperHttpWithRetries, self).setUp()
patches = ExitStack()
self.addCleanup(patches.close)

self.m_readurl = patches.enter_context(
mock.patch.object(
azure_helper.url_helper, 'readurl', mock.MagicMock()))
patches.enter_context(
mock.patch.object(azure_helper.time, 'sleep', mock.MagicMock()))

def test_http_with_retries(self):
self.m_readurl.return_value = 'TestResp'
self.assertEqual(
azure_helper.http_with_retries('testurl'),
self.m_readurl.return_value)
self.assertEqual(self.m_readurl.call_count, 1)

def test_http_with_retries_propagates_readurl_exc_and_logs_exc(
self):
self.m_readurl.side_effect = SentinelException

self.assertRaises(
SentinelException, azure_helper.http_with_retries, 'testurl')
self.assertEqual(self.m_readurl.call_count, self.max_readurl_attempts)

self.assertIsNotNone(
re.search(
r'Failed HTTP request with Azure endpoint \S* during '
r'attempt \d+ with exception: \S*',
self.logs.getvalue()))
self.assertIsNone(
re.search(
r'Successful HTTP request with Azure endpoint \S* after '
r'\d+ attempts',
self.logs.getvalue()))

def test_http_with_retries_delayed_success_due_to_temporary_readurl_exc(
self):
self.m_readurl.side_effect = \
[SentinelException] * self.periodic_logging_attempts + \
['TestResp']
self.m_readurl.return_value = 'TestResp'

response = azure_helper.http_with_retries('testurl')
self.assertEqual(
response,
self.m_readurl.return_value)
self.assertEqual(
self.m_readurl.call_count,
self.periodic_logging_attempts + 1)

def test_http_with_retries_long_delay_logs_periodic_failure_msg(self):
self.m_readurl.side_effect = \
[SentinelException] * self.periodic_logging_attempts + \
['TestResp']
self.m_readurl.return_value = 'TestResp'

azure_helper.http_with_retries('testurl')

self.assertEqual(
self.m_readurl.call_count,
self.periodic_logging_attempts + 1)
self.assertIsNotNone(
re.search(
r'Failed HTTP request with Azure endpoint \S* during '
r'attempt \d+ with exception: \S*',
self.logs.getvalue()))
self.assertIsNotNone(
re.search(
r'Successful HTTP request with Azure endpoint \S* after '
r'\d+ attempts',
self.logs.getvalue()))

def test_http_with_retries_short_delay_does_not_log_periodic_failure_msg(
self):
self.m_readurl.side_effect = \
[SentinelException] * \
(self.periodic_logging_attempts - 1) + \
['TestResp']
self.m_readurl.return_value = 'TestResp'

azure_helper.http_with_retries('testurl')
self.assertEqual(
self.m_readurl.call_count,
self.periodic_logging_attempts)

self.assertIsNone(
re.search(
r'Failed HTTP request with Azure endpoint \S* during '
r'attempt \d+ with exception: \S*',
self.logs.getvalue()))
self.assertIsNotNone(
re.search(
r'Successful HTTP request with Azure endpoint \S* after '
r'\d+ attempts',
self.logs.getvalue()))

def test_http_with_retries_calls_url_helper_readurl_with_args_kwargs(self):
testurl = mock.MagicMock()
kwargs = {
'headers': mock.MagicMock(),
'data': mock.MagicMock(),
# timeout kwarg should not be modified or deleted if present
'timeout': mock.MagicMock()
}
azure_helper.http_with_retries(testurl, **kwargs)
self.m_readurl.assert_called_once_with(testurl, **kwargs)

def test_http_with_retries_adds_timeout_kwarg_if_not_present(self):
testurl = mock.MagicMock()
kwargs = {
'headers': mock.MagicMock(),
'data': mock.MagicMock()
}
expected_kwargs = copy.deepcopy(kwargs)
expected_kwargs['timeout'] = self.default_readurl_timeout

azure_helper.http_with_retries(testurl, **kwargs)
self.m_readurl.assert_called_once_with(testurl, **expected_kwargs)

def test_http_with_retries_deletes_retries_kwargs_passed_in(
self):
"""http_with_retries already implements retry logic,
so url_helper.readurl should not have retries.
http_with_retries should delete kwargs that
cause url_helper.readurl to retry.
"""
testurl = mock.MagicMock()
kwargs = {
'headers': mock.MagicMock(),
'data': mock.MagicMock(),
'timeout': mock.MagicMock(),
'retries': mock.MagicMock(),
'infinite': mock.MagicMock()
}
expected_kwargs = copy.deepcopy(kwargs)
expected_kwargs.pop('retries', None)
expected_kwargs.pop('infinite', None)

azure_helper.http_with_retries(testurl, **kwargs)
self.m_readurl.assert_called_once_with(testurl, **expected_kwargs)
self.assertIn(
'retries kwarg passed in for communication with Azure endpoint.',
self.logs.getvalue())
self.assertIn(
'infinite kwarg passed in for communication with Azure endpoint.',
self.logs.getvalue())


class TestOpenSSLManager(CiTestCase):
Expand Down