From 911bdc1b0f6abcf8189265b54c60c67c0accee51 Mon Sep 17 00:00:00 2001 From: Ben Greeley Date: Wed, 9 Nov 2022 14:57:22 -0700 Subject: [PATCH] Remove unused arguments in MediaStore (#862) Fixes https://github.com/WordPress/openverse-catalog/issues/729 --- .../dags/common/storage/audio.py | 2 +- .../dags/common/storage/image.py | 2 +- .../dags/common/storage/media.py | 26 +++++-------------- tests/dags/common/storage/test_audio.py | 9 +------ tests/dags/common/storage/test_media.py | 10 +------ 5 files changed, 10 insertions(+), 39 deletions(-) diff --git a/openverse_catalog/dags/common/storage/audio.py b/openverse_catalog/dags/common/storage/audio.py index b98890fb1f..3f331684de 100644 --- a/openverse_catalog/dags/common/storage/audio.py +++ b/openverse_catalog/dags/common/storage/audio.py @@ -37,7 +37,7 @@ def __init__( media_type="audio", tsv_columns=None, ): - super().__init__(provider, output_file, output_dir, buffer_length, media_type) + super().__init__(provider, buffer_length, media_type) self.columns = CURRENT_AUDIO_TSV_COLUMNS if tsv_columns is None else tsv_columns def add_item( diff --git a/openverse_catalog/dags/common/storage/image.py b/openverse_catalog/dags/common/storage/image.py index b3f54d454b..9f90cf7791 100644 --- a/openverse_catalog/dags/common/storage/image.py +++ b/openverse_catalog/dags/common/storage/image.py @@ -37,7 +37,7 @@ def __init__( media_type="image", tsv_columns=None, ): - super().__init__(provider, output_file, output_dir, buffer_length, media_type) + super().__init__(provider, buffer_length, media_type) self.columns = CURRENT_IMAGE_TSV_COLUMNS if tsv_columns is None else tsv_columns def add_item( diff --git a/openverse_catalog/dags/common/storage/media.py b/openverse_catalog/dags/common/storage/media.py index d73073ad4f..bc9146c312 100644 --- a/openverse_catalog/dags/common/storage/media.py +++ b/openverse_catalog/dags/common/storage/media.py @@ -49,9 +49,6 @@ class MediaStore(metaclass=abc.ABCMeta): date: Date String in the form YYYY-MM-DD. This is the date for which data is being stored. If provided, it will be appended to the tsv filename. - output_file: String giving a temporary .tsv filename (*not* the - full path) where the media info should be stored. - output_dir: String giving a path where `output_file` should be placed. buffer_length: Integer giving the maximum number of media information rows to store in memory before writing them to disk. """ @@ -59,8 +56,6 @@ class MediaStore(metaclass=abc.ABCMeta): def __init__( self, provider: Optional[str] = None, - output_file: Optional[str] = None, - output_dir: Optional[str] = None, buffer_length: int = 100, media_type: Optional[str] = "generic", ): @@ -68,9 +63,7 @@ def __init__( self.media_type = media_type self.provider = provider self.buffer_length = buffer_length - self.output_path = self._initialize_output_path( - output_dir, output_file, provider - ) + self.output_path = self._initialize_output_path(provider) self.columns = None self._media_buffer = [] self._total_items = 0 @@ -157,8 +150,6 @@ def commit(self): def _initialize_output_path( self, - output_dir: Optional[str], - output_file: Optional[str], provider: Optional[str], version: Optional[str] = None, ) -> str: @@ -170,9 +161,8 @@ def _initialize_output_path( Returns: Path of the tsv file to write media data pulled from providers """ - if output_dir is None: - logger.info("No given output directory. Using OUTPUT_DIR from environment.") - output_dir = os.getenv("OUTPUT_DIR") + + output_dir = os.getenv("OUTPUT_DIR") if output_dir is None: logger.warning( "OUTPUT_DIR is not set in the environment. Output will go to /tmp." @@ -180,13 +170,9 @@ def _initialize_output_path( output_dir = "/tmp" if version is None: version = CURRENT_VERSION[self.media_type] - if output_file is not None: - output_file = str(output_file) - else: - datetime_string = datetime.now().strftime("%Y%m%d%H%M%S") - output_file = ( - f"{provider}_{self.media_type}_v{version}_{datetime_string}.tsv" - ) + + datetime_string = datetime.now().strftime("%Y%m%d%H%M%S") + output_file = f"{provider}_{self.media_type}_v{version}_{datetime_string}.tsv" output_path = os.path.join(output_dir, output_file) logger.info(f"Output path: {output_path}") diff --git a/tests/dags/common/storage/test_audio.py b/tests/dags/common/storage/test_audio.py index a1942c5f5c..4e128cd372 100644 --- a/tests/dags/common/storage/test_audio.py +++ b/tests/dags/common/storage/test_audio.py @@ -101,15 +101,8 @@ def test_AudioStore_add_item_adds_multiple_audios_to_buffer(): def test_AudioStore_add_item_flushes_buffer(tmpdir): - output_file = "testing.tsv" - tmp_directory = tmpdir - output_dir = str(tmp_directory) - tmp_file = tmp_directory.join(output_file) - tmp_path_full = str(tmp_file) audio_store = audio.AudioStore( provider="testing_provider", - output_file=output_file, - output_dir=output_dir, buffer_length=3, ) audio_store.add_item( @@ -137,7 +130,7 @@ def test_AudioStore_add_item_flushes_buffer(tmpdir): license_info=PD_LICENSE_INFO, ) assert len(audio_store._media_buffer) == 1 - with open(tmp_path_full) as f: + with open(audio_store.output_path) as f: lines = f.read().split("\n") assert len(lines) == 4 # recall the last '\n' will create an empty line. diff --git a/tests/dags/common/storage/test_media.py b/tests/dags/common/storage/test_media.py index 1ae4ef3cca..d2b7ce5128 100644 --- a/tests/dags/common/storage/test_media.py +++ b/tests/dags/common/storage/test_media.py @@ -89,16 +89,8 @@ def test_MediaStore_includes_media_type_in_output_file_string(): def test_MediaStore_add_item_flushes_buffer(tmpdir): - output_file = "testing.tsv" - tmp_directory = tmpdir - output_dir = str(tmp_directory) - tmp_file = tmp_directory.join(output_file) - tmp_path_full = str(tmp_file) - image_store = image.ImageStore( provider="testing_provider", - output_file=output_file, - output_dir=output_dir, buffer_length=3, ) image_store.add_item( @@ -126,7 +118,7 @@ def test_MediaStore_add_item_flushes_buffer(tmpdir): license_info=PD_LICENSE_INFO, ) assert len(image_store._media_buffer) == 1 - with open(tmp_path_full) as f: + with open(image_store.output_path) as f: lines = f.read().split("\n") assert len(lines) == 4 # recall the last '\n' will create an empty line.