diff --git a/DAGs.md b/DAGs.md index da5334b495..7d5a93f3d9 100644 --- a/DAGs.md +++ b/DAGs.md @@ -282,6 +282,8 @@ Output: TSV file containing the image, the respective Notes: https://freesound.org/apiv2/search/text' No rate limit specified. + This script can be run either to ingest the full dataset or + as a dated DAG. ## `image_data_refresh` diff --git a/openverse_catalog/dags/providers/provider_api_scripts/freesound.py b/openverse_catalog/dags/providers/provider_api_scripts/freesound.py index 210105b4ab..058306996e 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/freesound.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/freesound.py @@ -8,294 +8,259 @@ Notes: https://freesound.org/apiv2/search/text' No rate limit specified. + This script can be run either to ingest the full dataset or + as a dated DAG. """ -import copy import functools import logging from datetime import datetime import requests from airflow.models import Variable +from common import constants from common.licenses.licenses import get_license_info from common.loader import provider_details as prov -from common.requester import DelayedRequester -from common.storage.audio import AudioStore +from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester from requests.exceptions import ConnectionError, SSLError from retry import retry -LIMIT = 150 -DELAY = 1 # in seconds -RETRIES = 3 -HOST = "freesound.org" -ENDPOINT = f"https://{HOST}/apiv2/search/text" -PROVIDER = prov.FREESOUND_DEFAULT_PROVIDER -API_KEY = Variable.get("API_KEY_FREESOUND", default_var="not_set") -FLAKY_EXCEPTIONS = (SSLError, ConnectionError) - -HEADERS = { - "Accept": "application/json", -} -DEFAULT_QUERY_PARAMS = { - "format": "json", - "token": API_KEY, - "query": "", - "page_size": LIMIT, - "fields": "id,url,name,tags,description,created,license,type,download," - "filesize,bitrate,bitdepth,duration,samplerate,pack,username," - "num_downloads,avg_rating,num_ratings,geotag,previews", -} - -delayed_requester = DelayedRequester(DELAY) -audio_store = AudioStore(provider=PROVIDER) - -logging.basicConfig( - format="%(asctime)s - %(name)s - %(levelname)s: %(message)s", - level=logging.INFO, -) logger = logging.getLogger(__name__) -preview_bitrates = { - "preview-hq-mp3": 128000, - "preview-lq-mp3": 64000, - "preview-hq-ogg": 192000, - "preview-lq-ogg": 80000, -} - - -def main(date="all"): - """This script pulls the data for a given date from the Freesound, - and writes it into a .TSV file to be eventually read - into our DB. - - Required Arguments: - - date: Date String in the form YYYY-MM-DD. This is the date for - which running the script will pull data. - """ - - logger.info("Begin: Freesound script") - licenses = ["Attribution", "Attribution Noncommercial", "Creative Commons 0"] - for license_name in licenses: - audio_count = _get_items(license_name, date) - logger.info(f"Audios for {license_name} pulled: {audio_count}") - logger.info("Terminated!") - - -def _get_query_params( - license_name="", - page_number=1, - default_query_params=None, -): - default_query_params = default_query_params or DEFAULT_QUERY_PARAMS - return { - **default_query_params, - "page": str(page_number), - "license": license_name, + +class FreesoundDataIngester(ProviderDataIngester): + batch_limit = 150 + host = "freesound.org" + endpoint = f"https://{host}/apiv2/search/text" + providers = {"audio": prov.FREESOUND_DEFAULT_PROVIDER} + flaky_exceptions = (SSLError, ConnectionError) + preferred_preview = "preview-hq-mp3" + preview_bitrates = { + "preview-hq-mp3": 128000, + "preview-lq-mp3": 64000, + "preview-hq-ogg": 192000, + "preview-lq-ogg": 80000, } + headers = { + "Accept": "application/json", + } -def _get_items(license_name, date): - item_count = 0 - page_number = 1 - should_continue = True - default_query_params = copy.deepcopy(DEFAULT_QUERY_PARAMS) - try: - start_date = datetime.strftime( - datetime.fromisoformat(date), "%Y-%m-%dT%H:%M:%SZ" - ) - except ValueError: - start_date = "*" - default_query_params["filter"] = f"created:[{start_date} TO NOW]" - while should_continue: - query_param = _get_query_params( - default_query_params=default_query_params, - license_name=license_name, - page_number=page_number, - ) - batch_data = _get_batch_json(query_param=query_param) - if batch_data: - item_count = _process_item_batch(batch_data) - page_number += 1 + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.api_key = Variable.get("API_KEY_FREESOUND") + + def get_media_type(self, record: dict) -> str: + return constants.AUDIO + + def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict: + if not prev_query_params: + start_date = "*" + # Allow self.date to be undefined, necessary for the first full, successful + # run of Freesound but can be changed to dated afterwards. + if self.date: + start_date = datetime.strftime( + datetime.fromisoformat(self.date), "%Y-%m-%dT%H:%M:%SZ" + ) + return { + "format": "json", + "token": self.api_key, + "query": "", + "page_size": self.batch_limit, + "fields": ",".join( + [ + "id", + "url", + "name", + "tags", + "description", + "created", + "license", + "type", + "download", + "filesize", + "bitrate", + "bitdepth", + "duration", + "samplerate", + "pack", + "username", + "num_downloads", + "avg_rating", + "num_ratings", + "geotag", + "previews", + ] + ), + "filter": f"created:[{start_date} TO NOW]", + "page": 1, + } else: - should_continue = False - return item_count + return {**prev_query_params, "page": prev_query_params["page"] + 1} - -def _get_batch_json(endpoint=ENDPOINT, headers=None, retries=RETRIES, query_param=None): - if headers is None: - headers = HEADERS - response_json = delayed_requester.get_response_json( - endpoint, retries, query_param, headers=headers - ) - if response_json is None: - return None - else: - results = response_json.get("results") - return results - - -def _process_item_batch(items_batch): - for item in items_batch: - # Freesound sometimes returns results that are just "None" - if item is None: - continue - item_meta_data = _extract_audio_data(item) - if item_meta_data is None: - continue - audio_store.add_item(**item_meta_data) - return audio_store.total_items - - -def _extract_audio_data(media_data): - """Extracts meta data about the audio file - Freesound does not have audio thumbnails""" - foreign_landing_url = media_data.get("url") - if not foreign_landing_url: - return None - - foreign_identifier = media_data.get("id") - if not foreign_identifier: - return None - - item_license = _get_license(media_data) - if item_license is None: - return None - - # We use the mp3-hq preview url as `audio_url` as the main url - # for playing on the frontend, - # and the actual uploaded file as an alt_file that is available - # for download (and requires a user to be authenticated to download) - try: - main_audio, alt_files = _get_audio_files(media_data) - except FLAKY_EXCEPTIONS: - logger.warning(f"Unable to get file size for {foreign_landing_url}, skipping") - return None - if main_audio is None: + def get_batch_data(self, response_json): + if results := response_json.get("results"): + # Freesound sometimes returns results that are just "None", filter these out + return [item for item in results if item is not None] return None - creator, creator_url = _get_creator_data(media_data) - duration = int(media_data.get("duration") * 1000) - set_foreign_id, audio_set, set_url = _get_audio_set_info(media_data) - return { - "title": media_data.get("name"), - "creator": creator, - "creator_url": creator_url, - "foreign_identifier": foreign_identifier, - "foreign_landing_url": foreign_landing_url, - "duration": duration, - "license_info": item_license, - "meta_data": _get_metadata(media_data), - "raw_tags": media_data.get("tags"), - "set_foreign_id": set_foreign_id, - "audio_set": audio_set, - "set_url": set_url, - "alt_files": alt_files, - # audio_url, filetype, bit_rate - **main_audio, - } - - -def _get_audio_set_info(media_data): - # set id, set name, set url - set_url = media_data.get("pack") - if set_url is not None: - set_id, set_name = _get_set_info(set_url) - return set_id, set_name, set_url - else: - return None, None, None - - -@functools.lru_cache(maxsize=1024) -def _get_set_info(set_url): - response_json = delayed_requester.get_response_json( - set_url, - 3, - query_params={"token": API_KEY}, - ) - set_id = response_json.get("id") - set_name = response_json.get("name") - return set_id, set_name - - -def _get_preview_filedata(preview_type, preview_url): - return { - "url": preview_url, - "filetype": preview_type.split("-")[-1], - "bit_rate": preview_bitrates[preview_type], - } + @staticmethod + def _get_creator_data(item): + if creator := item.get("username"): + creator = creator.strip() + creator_url = f"https://freesound.org/people/{creator}/" + else: + creator_url = None + return creator, creator_url + + @staticmethod + def _get_metadata(item): + metadata = {} + fields = [ + "description", + "num_downloads", + "avg_rating", + "num_ratings", + "geotag", + "download", + ] + for field in fields: + if field_value := item.get(field): + metadata[field] = field_value + return metadata + + @staticmethod + def _get_license(item): + item_license = get_license_info(license_url=item.get("license")) + + if item_license.license is None: + return None + return item_license + + @functools.lru_cache(maxsize=1024) + def _get_set_info(self, set_url): + response_json = self.get_response_json( + query_params={"token": self.api_key}, + endpoint=set_url, + ) + set_id = response_json.get("id") + set_name = response_json.get("name") + return set_id, set_name + + def _get_audio_set_info(self, media_data): + # set id, set name, set url + set_url = media_data.get("pack") + if set_url is not None: + set_id, set_name = self._get_set_info(set_url) + return set_id, set_name, set_url + else: + return None, None, None + + @retry(flaky_exceptions, tries=3, delay=1, backoff=2) + def _get_audio_file_size(self, url): + """ + Get the content length of a provided URL. + Freesound can be finicky, so we want to retry it a few times on + these conditions: + * SSLError - 'EOF occurred in violation of protocol (_ssl.c:1129)' + * ConnectionError - '[Errno 113] No route to host' + + Both of these seem transient and may be the result of some odd behavior on the + Freesound API end. We have an API key that's supposed to be maxed out, so + I can't imagine it's throttling (aetherunbound). + + TODO(obulat): move filesize detection to the polite crawler + """ + return int(requests.head(url).headers["content-length"]) + + def _get_audio_files( + self, media_data + ) -> tuple[dict, list[dict]] | tuple[None, None]: + previews = media_data.get("previews") + # If there are no previews, then we will not be able to play the file + if not previews: + return None, None + + # If our preferred preview type is not present, skip this audio + if not (preview_url := previews.get(self.preferred_preview)): + return None, None + + main_file = { + "audio_url": preview_url, + "filetype": self.preferred_preview.split("-")[-1], + "bit_rate": FreesoundDataIngester.preview_bitrates[self.preferred_preview], + "filesize": self._get_audio_file_size(preview_url), + } + # These are the original files, needs auth for downloading. + # bit_rate in kilobytes, converted to bytes + alt_files = [ + { + "url": media_data.get("download"), + "bit_rate": int(media_data.get("bitrate")) * 1000, + "sample_rate": int(media_data.get("samplerate")), + "filetype": media_data.get("type"), + "filesize": media_data.get("filesize"), + } + ] + return main_file, alt_files + + def get_record_data(self, media_data: dict) -> dict | list[dict] | None: + """ + Extracts metadata about the audio file. + Freesound does not have audio thumbnails. + """ + foreign_landing_url = media_data.get("url") + if not foreign_landing_url: + return None + + foreign_identifier = media_data.get("id") + if not foreign_identifier: + return None + + item_license = self._get_license(media_data) + if item_license is None: + return None + + # We use the mp3-hq preview url as `audio_url` as the main url + # for playing on the frontend, + # and the actual uploaded file as an alt_file that is available + # for download (and requires a user to be authenticated to download) + try: + main_audio, alt_files = self._get_audio_files(media_data) + except self.flaky_exceptions: + logger.warning( + f"Unable to get file size for {foreign_landing_url}, skipping" + ) + return None + if main_audio is None: + return None + + creator, creator_url = self._get_creator_data(media_data) + duration = int(media_data.get("duration") * 1000) + set_foreign_id, audio_set, set_url = self._get_audio_set_info(media_data) + return { + "title": media_data.get("name"), + "creator": creator, + "creator_url": creator_url, + "foreign_identifier": foreign_identifier, + "foreign_landing_url": foreign_landing_url, + "duration": duration, + "license_info": item_license, + "meta_data": self._get_metadata(media_data), + "raw_tags": media_data.get("tags"), + "set_foreign_id": set_foreign_id, + "audio_set": audio_set, + "set_url": set_url, + "alt_files": alt_files, + # audio_url, filetype, bit_rate + **main_audio, + } -@retry(FLAKY_EXCEPTIONS, tries=3, delay=1, backoff=2) -def _get_audio_file_size(url): - """ - Get the content length of a provided URL. - Freesound can be finicky, so we want to retry it a few times on these conditions: - * SSLError - 'EOF occurred in violation of protocol (_ssl.c:1129)' - * ConnectionError - '[Errno 113] No route to host' - - Both of these seem transient and may be the result of some odd behavior on the - Freesound API end. We have an API key that's supposed to be maxed out, so I can't - imagine it's throttling (aetherunbound). - - TODO(obulat): move filesize detection to the polite crawler - """ - return int(requests.head(url).headers["content-length"]) - - -def _get_audio_files(media_data): - # This is the original file, needs auth for downloading. - # bit_rate in kilobytes, converted to bytes - alt_files = [ - { - "url": media_data.get("download"), - "bit_rate": int(media_data.get("bitrate")) * 1000, - "sample_rate": int(media_data.get("samplerate")), - "filetype": media_data.get("type"), - "filesize": media_data.get("filesize"), - } - ] - previews = media_data.get("previews") - # If there are no previews, then we will not be able to play the file - if not previews: - return None - main_file = _get_preview_filedata("preview-hq-mp3", previews["preview-hq-mp3"]) - main_file["audio_url"] = main_file.pop("url") - main_file["filesize"] = _get_audio_file_size(main_file["audio_url"]) - return main_file, alt_files - - -def _get_creator_data(item): - if creator := item.get("username"): - creator = creator.strip() - creator_url = f"https://freesound.org/people/{creator}/" - else: - creator_url = None - return creator, creator_url - - -def _get_metadata(item): - metadata = {} - fields = [ - "description", - "num_downloads", - "avg_rating", - "num_ratings", - "geotag", - "download", - ] - for field in fields: - if field_value := item.get(field): - metadata[field] = field_value - return metadata - - -def _get_license(item): - item_license = get_license_info(license_url=item.get("license")) - - if item_license.license is None: - return None - return item_license +def main(): + logger.info("Begin: Freesound provider script") + ingester = FreesoundDataIngester() + ingester.ingest_records() if __name__ == "__main__": diff --git a/openverse_catalog/dags/providers/provider_workflows.py b/openverse_catalog/dags/providers/provider_workflows.py index 588aa5818a..eceb0a6328 100644 --- a/openverse_catalog/dags/providers/provider_workflows.py +++ b/openverse_catalog/dags/providers/provider_workflows.py @@ -6,6 +6,7 @@ from providers.provider_api_scripts.brooklyn_museum import BrooklynMuseumDataIngester from providers.provider_api_scripts.cleveland_museum import ClevelandDataIngester from providers.provider_api_scripts.finnish_museums import FinnishMuseumsDataIngester +from providers.provider_api_scripts.freesound import FreesoundDataIngester from providers.provider_api_scripts.inaturalist import INaturalistDataIngester from providers.provider_api_scripts.jamendo import JamendoDataIngester from providers.provider_api_scripts.metropolitan_museum import MetMuseumDataIngester @@ -150,6 +151,7 @@ def __post_init__(self): ), ProviderWorkflow( provider_script="freesound", + ingestion_callable=FreesoundDataIngester, media_types=("audio",), ), ProviderWorkflow( diff --git a/tests/dags/providers/provider_api_scripts/test_freesound.py b/tests/dags/providers/provider_api_scripts/test_freesound.py index 27b6217e84..0ac1b0db0f 100644 --- a/tests/dags/providers/provider_api_scripts/test_freesound.py +++ b/tests/dags/providers/provider_api_scripts/test_freesound.py @@ -4,52 +4,44 @@ import pytest from common.licenses.licenses import LicenseInfo -from providers.provider_api_scripts import freesound +from providers.provider_api_scripts.freesound import FreesoundDataIngester RESOURCES = Path(__file__).parent.resolve() / "resources/freesound" +fsd = FreesoundDataIngester() AUDIO_FILE_SIZE = 16359 @pytest.fixture(autouse=True) def freesound_module(): - old_get_set = freesound._get_set_info - freesound._get_set_info = lambda x: ("foo", x) + old_get_set = fsd._get_set_info + fsd._get_set_info = lambda x: ("foo", x) yield - freesound._get_set_info = old_get_set + fsd._get_set_info = old_get_set @pytest.fixture def file_size_patch(): - with patch( - "providers.provider_api_scripts.freesound._get_audio_file_size" - ) as get_file_size_mock: + with patch.object(fsd, "_get_audio_file_size") as get_file_size_mock: get_file_size_mock.return_value = AUDIO_FILE_SIZE yield @pytest.fixture def audio_data(): - AUDIO_DATA_EXAMPLE = RESOURCES / "audio_data_example.json" - with open(AUDIO_DATA_EXAMPLE) as f: + audio_data_example = RESOURCES / "audio_data_example.json" + with open(audio_data_example) as f: yield json.load(f) -def test_get_audio_pages_returns_correctly_with_none_json(): - expect_result = None - with patch.object( - freesound.delayed_requester, "get_response_json", return_value=None - ): - actual_result = freesound._get_batch_json() - assert actual_result == expect_result +def test_get_audio_pages_returns_correctly_with_no_data(): + actual_result = fsd.get_batch_data({}) + assert actual_result is None -def test_get_audio_pages_returns_correctly_with_no_results(): - expect_result = None - with patch.object( - freesound.delayed_requester, "get_response_json", return_value={} - ): - actual_result = freesound._get_batch_json() +def test_get_audio_pages_returns_correctly_with_empty_list(): + expect_result = [] + actual_result = fsd.get_batch_data({"results": [None, None, None]}) assert actual_result == expect_result @@ -57,7 +49,7 @@ def test_get_audio_pages_returns_correctly_with_no_results(): "exception_type", [ # These are fine - *freesound.FLAKY_EXCEPTIONS, + *FreesoundDataIngester.flaky_exceptions, # This should raise immediately pytest.param(ValueError, marks=pytest.mark.raises(exception=ValueError)), ], @@ -67,108 +59,81 @@ def test_get_audio_file_size_retries_and_does_not_raise(exception_type, audio_da # Patch the sleep function so it doesn't take long with patch("requests.head") as head_patch, patch("time.sleep"): head_patch.side_effect = exception_type("whoops") - actual_result = freesound._extract_audio_data(audio_data) + actual_result = fsd.get_record_data(audio_data) assert head_patch.call_count == 3 assert actual_result == expected_result -def test_get_query_params_adds_page_number(): - actual_qp = freesound._get_query_params(page_number=2) - assert actual_qp["page"] == str(2) - - -def test_get_query_params_leaves_other_keys(): - actual_qp = freesound._get_query_params( - page_number=200, default_query_params={"test": "value"} - ) - assert actual_qp["test"] == "value" - assert len(actual_qp.keys()) == 3 +def test_get_query_params_increments_page_number(): + first_qp = fsd.get_next_query_params(None) + second_qp = fsd.get_next_query_params(first_qp) + first_page = first_qp.pop("page") + second_page = second_qp.pop("page") + # Should be the same beyond that + assert first_qp == second_qp + assert second_page == first_page + 1 def test_get_items(file_size_patch): with open(RESOURCES / "page.json") as f: first_response = json.load(f) - with patch.object(freesound, "_get_batch_json", side_effect=[first_response, []]): - expected_audio_count = 6 - actual_audio_count = freesound._get_items( - license_name="Attribution", date="all" - ) - assert expected_audio_count == actual_audio_count - - -@pytest.mark.parametrize("has_nones", [False, True]) -def test_process_item_batch_handles_example_batch( - has_nones, audio_data, file_size_patch -): - items_batch = [audio_data] - if has_nones: - items_batch = [None, None, audio_data, None] - with patch.object(freesound.audio_store, "add_item", return_value=1) as mock_add: - freesound._process_item_batch(items_batch) - mock_add.assert_called_once() - _, actual_call_args = mock_add.call_args_list[0] - expected_call_args = { - "alt_files": [ - { - "bit_rate": 1381000, - "filesize": 107592, - "filetype": "wav", - "sample_rate": 44100, - "url": "https://freesound.org/apiv2/sounds/415362/download/", - } - ], - "audio_set": "https://freesound.org/apiv2/packs/23434/", + expected_audio_count = 6 + actual_audio_count = fsd.process_batch(first_response) + assert actual_audio_count == expected_audio_count + + +def test_get_audio_files_handles_example_audio_data(audio_data, file_size_patch): + actual = fsd._get_audio_files(audio_data) + expected = ( + { "audio_url": "https://freesound.org/data/previews/415/415362_6044691-hq.mp3", "bit_rate": 128000, - "creator": "owly-bee", - "creator_url": "https://freesound.org/people/owly-bee/", - "duration": 608, "filesize": AUDIO_FILE_SIZE, "filetype": "mp3", - "foreign_identifier": 415362, - "foreign_landing_url": "https://freesound.org/people/owly-bee/sounds/415362/", - "license_info": LicenseInfo( - license="by", - version="3.0", - url="https://creativecommons.org/licenses/by/3.0/", - raw_url="http://creativecommons.org/licenses/by/3.0/", - ), - "meta_data": { - "description": "A disinterested noise in a somewhat low tone.", - "download": "https://freesound.org/apiv2/sounds/415362/download/", - "num_downloads": 164, - }, - "raw_tags": ["eh", "disinterest", "low", "uh", "voice", "uncaring"], - "set_foreign_id": "foo", - "set_url": "https://freesound.org/apiv2/packs/23434/", - "title": "Ehh disinterested.wav", - } - assert actual_call_args == expected_call_args + }, + [ + { + "bit_rate": 1381000, + "filesize": 107592, + "filetype": "wav", + "sample_rate": 44100, + "url": "https://freesound.org/apiv2/sounds/415362/download/", + } + ], + ) + assert actual == expected -def test_extract_audio_data_returns_none_when_no_foreign_id(audio_data): - audio_data.pop("id", None) - actual_audio_info = freesound._extract_audio_data(audio_data) - expected_audio_info = None - assert actual_audio_info is expected_audio_info +def test_get_audio_files_returns_none_when_missing_previews(audio_data): + audio_data.pop("previews", None) + actual = fsd._get_audio_files(audio_data) + assert actual == (None, None) -def test_extract_audio_data_returns_none_when_no_audio_url(audio_data): - audio_data.pop("url", None) - audio_data.pop("download", None) - actual_audio_info = freesound._extract_audio_data(audio_data) - assert actual_audio_info is None +def test_get_audio_files_returns_none_when_missing_preferred_preview(audio_data): + audio_data["previews"].pop(fsd.preferred_preview) + actual = fsd._get_audio_files(audio_data) + assert actual == (None, None) -def test_extract_audio_data_returns_none_when_no_license(audio_data): - audio_data.pop("license", None) - actual_audio_info = freesound._extract_audio_data(audio_data) - assert actual_audio_info is None +@pytest.mark.parametrize( + "missing_fields", + [ + ("id",), + ("url", "download"), + ("license",), + ], +) +def test_get_record_data_returns_none_when_missing_data(missing_fields, audio_data): + for field in missing_fields: + audio_data.pop(field, None) + actual = fsd.get_record_data(audio_data) + assert actual is None def test_get_audio_set_info(audio_data): - set_foreign_id, audio_set, set_url = freesound._get_audio_set_info(audio_data) + set_foreign_id, audio_set, set_url = fsd._get_audio_set_info(audio_data) expected_audio_set_info = ( "foo", "https://freesound.org/apiv2/packs/23434/", @@ -178,7 +143,7 @@ def test_get_audio_set_info(audio_data): def test_get_creator_data(audio_data): - actual_creator, actual_creator_url = freesound._get_creator_data(audio_data) + actual_creator, actual_creator_url = fsd._get_creator_data(audio_data) expected_creator = "owly-bee" expected_creator_url = "https://freesound.org/people/owly-bee/" @@ -188,14 +153,14 @@ def test_get_creator_data(audio_data): def test_get_creator_data_returns_none_when_no_artist(audio_data): audio_data.pop("username", None) - actual_creator, actual_creator_url = freesound._get_creator_data(audio_data) + actual_creator, actual_creator_url = fsd._get_creator_data(audio_data) assert actual_creator is None assert actual_creator_url is None def test_extract_audio_data_handles_example_dict(audio_data, file_size_patch): - actual_audio_info = freesound._extract_audio_data(audio_data) + actual_audio_info = fsd.get_record_data(audio_data) expected_audio_info = { "alt_files": [ { @@ -236,7 +201,7 @@ def test_extract_audio_data_handles_example_dict(audio_data, file_size_patch): def test_get_tags(audio_data, file_size_patch): - item_data = freesound._extract_audio_data(audio_data) + item_data = fsd.get_record_data(audio_data) actual_tags = item_data["raw_tags"] expected_tags = ["eh", "disinterest", "low", "uh", "voice", "uncaring"] assert expected_tags == actual_tags