Skip to content

Commit

Permalink
Add ConnectionError to acceptable flaky exceptions for Freesound (#413)
Browse files Browse the repository at this point in the history
  • Loading branch information
AetherUnbound committed Mar 18, 2022
1 parent fd68b9e commit e8400cd
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
18 changes: 13 additions & 5 deletions openverse_catalog/dags/providers/provider_api_scripts/freesound.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from common.loader import provider_details as prov
from common.requester import DelayedRequester
from common.storage.audio import AudioStore
from requests.exceptions import SSLError
from requests.exceptions import ConnectionError, SSLError
from retry import retry


Expand All @@ -31,6 +31,7 @@
ENDPOINT = f"https://{HOST}/apiv2/search/text"
PROVIDER = prov.FREESOUND_DEFAULT_PROVIDER
API_KEY = Variable.get("API_KEY_FREESOUND", default_var="not_set")
FLAKY_EXCEPTIONS = (SSLError, ConnectionError)

HEADERS = {
"Accept": "application/json",
Expand Down Expand Up @@ -167,7 +168,7 @@ def _extract_audio_data(media_data):
# for download (and requires a user to be authenticated to download)
try:
main_audio, alt_files = _get_audio_files(media_data)
except SSLError:
except FLAKY_EXCEPTIONS:
logger.warning(f"Unable to get file size for {foreign_landing_url}, skipping")
return None
if main_audio is None:
Expand Down Expand Up @@ -225,13 +226,20 @@ def _get_preview_filedata(preview_type, preview_url):
}


@retry(SSLError, tries=3, delay=1, backoff=2)
@retry(FLAKY_EXCEPTIONS, tries=3, delay=1, backoff=2)
def _get_audio_file_size(url):
"""
Get the content length of a provided URL.
Freesound can be a bit finicky, so we want to retry it a few times
Freesound can be finicky, so we want to retry it a few times on these conditions:
* SSLError - 'EOF occurred in violation of protocol (_ssl.c:1129)'
* ConnectionError - '[Errno 113] No route to host'
Both of these seem transient and may be the result of some odd behavior on the
Freesound API end. We have an API key that's supposed to be maxed out, so I can't
imagine it's throttling (aetherunbound).
TODO(obulat): move filesize detection to the polite crawler
"""
# TODO(obulat): move filesize detection to the polite crawler
return int(requests.head(url).headers["content-length"])


Expand Down
14 changes: 11 additions & 3 deletions tests/dags/providers/provider_api_scripts/test_freesound.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pytest
from common.licenses.licenses import LicenseInfo
from providers.provider_api_scripts import freesound
from requests.exceptions import SSLError


RESOURCES = Path(__file__).parent.resolve() / "resources/freesound"
Expand Down Expand Up @@ -54,11 +53,20 @@ def test_get_audio_pages_returns_correctly_with_no_results():
assert actual_result == expect_result


def test_get_audio_file_size_retries_and_does_not_raise(audio_data):
@pytest.mark.parametrize(
"exception_type",
[
# These are fine
*freesound.FLAKY_EXCEPTIONS,
# This should raise immediately
pytest.param(ValueError, marks=pytest.mark.raises(exception=ValueError)),
],
)
def test_get_audio_file_size_retries_and_does_not_raise(exception_type, audio_data):
expected_result = None
# Patch the sleep function so it doesn't take long
with patch("requests.head") as head_patch, patch("time.sleep"):
head_patch.side_effect = SSLError("whoops")
head_patch.side_effect = exception_type("whoops")
actual_result = freesound._extract_audio_data(audio_data)

assert head_patch.call_count == 3
Expand Down

0 comments on commit e8400cd

Please sign in to comment.