Skip to content

Commit

Permalink
Remove unused arguments in MediaStore (#862)
Browse files Browse the repository at this point in the history
  • Loading branch information
bengreeley committed Nov 9, 2022
1 parent 3dfd893 commit 911bdc1
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 39 deletions.
2 changes: 1 addition & 1 deletion openverse_catalog/dags/common/storage/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(
media_type="audio",
tsv_columns=None,
):
super().__init__(provider, output_file, output_dir, buffer_length, media_type)
super().__init__(provider, buffer_length, media_type)
self.columns = CURRENT_AUDIO_TSV_COLUMNS if tsv_columns is None else tsv_columns

def add_item(
Expand Down
2 changes: 1 addition & 1 deletion openverse_catalog/dags/common/storage/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(
media_type="image",
tsv_columns=None,
):
super().__init__(provider, output_file, output_dir, buffer_length, media_type)
super().__init__(provider, buffer_length, media_type)
self.columns = CURRENT_IMAGE_TSV_COLUMNS if tsv_columns is None else tsv_columns

def add_item(
Expand Down
26 changes: 6 additions & 20 deletions openverse_catalog/dags/common/storage/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,28 +49,21 @@ class MediaStore(metaclass=abc.ABCMeta):
date: Date String in the form YYYY-MM-DD. This is the date for
which data is being stored. If provided, it will be appended to
the tsv filename.
output_file: String giving a temporary .tsv filename (*not* the
full path) where the media info should be stored.
output_dir: String giving a path where `output_file` should be placed.
buffer_length: Integer giving the maximum number of media information rows
to store in memory before writing them to disk.
"""

def __init__(
self,
provider: Optional[str] = None,
output_file: Optional[str] = None,
output_dir: Optional[str] = None,
buffer_length: int = 100,
media_type: Optional[str] = "generic",
):
logger.info(f"Initialized {media_type} MediaStore with provider {provider}")
self.media_type = media_type
self.provider = provider
self.buffer_length = buffer_length
self.output_path = self._initialize_output_path(
output_dir, output_file, provider
)
self.output_path = self._initialize_output_path(provider)
self.columns = None
self._media_buffer = []
self._total_items = 0
Expand Down Expand Up @@ -157,8 +150,6 @@ def commit(self):

def _initialize_output_path(
self,
output_dir: Optional[str],
output_file: Optional[str],
provider: Optional[str],
version: Optional[str] = None,
) -> str:
Expand All @@ -170,23 +161,18 @@ def _initialize_output_path(
Returns:
Path of the tsv file to write media data pulled from providers
"""
if output_dir is None:
logger.info("No given output directory. Using OUTPUT_DIR from environment.")
output_dir = os.getenv("OUTPUT_DIR")

output_dir = os.getenv("OUTPUT_DIR")
if output_dir is None:
logger.warning(
"OUTPUT_DIR is not set in the environment. Output will go to /tmp."
)
output_dir = "/tmp"
if version is None:
version = CURRENT_VERSION[self.media_type]
if output_file is not None:
output_file = str(output_file)
else:
datetime_string = datetime.now().strftime("%Y%m%d%H%M%S")
output_file = (
f"{provider}_{self.media_type}_v{version}_{datetime_string}.tsv"
)

datetime_string = datetime.now().strftime("%Y%m%d%H%M%S")
output_file = f"{provider}_{self.media_type}_v{version}_{datetime_string}.tsv"

output_path = os.path.join(output_dir, output_file)
logger.info(f"Output path: {output_path}")
Expand Down
9 changes: 1 addition & 8 deletions tests/dags/common/storage/test_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,8 @@ def test_AudioStore_add_item_adds_multiple_audios_to_buffer():


def test_AudioStore_add_item_flushes_buffer(tmpdir):
output_file = "testing.tsv"
tmp_directory = tmpdir
output_dir = str(tmp_directory)
tmp_file = tmp_directory.join(output_file)
tmp_path_full = str(tmp_file)
audio_store = audio.AudioStore(
provider="testing_provider",
output_file=output_file,
output_dir=output_dir,
buffer_length=3,
)
audio_store.add_item(
Expand Down Expand Up @@ -137,7 +130,7 @@ def test_AudioStore_add_item_flushes_buffer(tmpdir):
license_info=PD_LICENSE_INFO,
)
assert len(audio_store._media_buffer) == 1
with open(tmp_path_full) as f:
with open(audio_store.output_path) as f:
lines = f.read().split("\n")
assert len(lines) == 4 # recall the last '\n' will create an empty line.

Expand Down
10 changes: 1 addition & 9 deletions tests/dags/common/storage/test_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,8 @@ def test_MediaStore_includes_media_type_in_output_file_string():


def test_MediaStore_add_item_flushes_buffer(tmpdir):
output_file = "testing.tsv"
tmp_directory = tmpdir
output_dir = str(tmp_directory)
tmp_file = tmp_directory.join(output_file)
tmp_path_full = str(tmp_file)

image_store = image.ImageStore(
provider="testing_provider",
output_file=output_file,
output_dir=output_dir,
buffer_length=3,
)
image_store.add_item(
Expand Down Expand Up @@ -126,7 +118,7 @@ def test_MediaStore_add_item_flushes_buffer(tmpdir):
license_info=PD_LICENSE_INFO,
)
assert len(image_store._media_buffer) == 1
with open(tmp_path_full) as f:
with open(image_store.output_path) as f:
lines = f.read().split("\n")
assert len(lines) == 4 # recall the last '\n' will create an empty line.

Expand Down

0 comments on commit 911bdc1

Please sign in to comment.