Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import shutil
import tempfile
import typing
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Optional, TypeVar

Expand All @@ -17,12 +17,6 @@
T = TypeVar("T")


@dataclass
class DeleteAllResult:
deleted_count: int
freed_space_bytes: float


class ObjectSerializerDisk(ObjectSerializerBase[T]):
"""Disk-backed storage for arbitrary python objects. Serialization is handled by `torch.save` and `torch.load`.

Expand All @@ -35,6 +29,12 @@ def __init__(self, output_dir: Path, ephemeral: bool = False):
self._ephemeral = ephemeral
self._base_output_dir = output_dir
self._base_output_dir.mkdir(parents=True, exist_ok=True)

if self._ephemeral:
# Remove dangling tempdirs that might have been left over from an earlier unplanned shutdown.
for temp_dir in filter(Path.is_dir, self._base_output_dir.glob("tmp*")):
shutil.rmtree(temp_dir)

# Must specify `ignore_cleanup_errors` to avoid fatal errors during cleanup on Windows
self._tempdir = (
tempfile.TemporaryDirectory(dir=self._base_output_dir, ignore_cleanup_errors=True) if ephemeral else None
Expand Down
14 changes: 14 additions & 0 deletions tests/test_object_serializer_disk.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,20 @@ def test_obj_serializer_ephemeral_writes_to_tempdir(tmp_path: Path):
assert not Path(tmp_path, obj_1_name).exists()


def test_obj_serializer_ephemeral_deletes_dangling_tempdirs_on_init(tmp_path: Path):
tempdir = tmp_path / "tmpdir"
tempdir.mkdir()
ObjectSerializerDisk[MockDataclass](tmp_path, ephemeral=True)
assert not tempdir.exists()


def test_obj_serializer_does_not_delete_tempdirs_on_init(tmp_path: Path):
tempdir = tmp_path / "tmpdir"
tempdir.mkdir()
ObjectSerializerDisk[MockDataclass](tmp_path, ephemeral=False)
assert tempdir.exists()


def test_obj_serializer_disk_different_types(tmp_path: Path):
obj_serializer_1 = ObjectSerializerDisk[MockDataclass](tmp_path)
obj_1 = MockDataclass(foo="bar")
Expand Down