Skip to content

Commit

Permalink
Add a test of some pathological cases
Browse files Browse the repository at this point in the history
  • Loading branch information
jwodder committed Mar 17, 2022
1 parent 54537c8 commit 8a444b1
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 4 deletions.
6 changes: 4 additions & 2 deletions tools/backups2datalad/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,10 @@ async def sync_zarr(
summary = report.get_summary()
log.info("Zarr %s: %s; committing", asset.zarr, summary)
if zsync.last_timestamp is None:
assert delete_ts is not None
commit_ts = delete_ts
if delete_ts is None:
commit_ts = asset.created
else:
commit_ts = delete_ts
elif delete_ts is not None and zsync.last_timestamp < delete_ts:
commit_ts = delete_ts
else:
Expand Down
76 changes: 74 additions & 2 deletions tools/test_backups2datalad/test_zarr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
from operator import itemgetter
from pathlib import Path
from shutil import rmtree
from typing import Optional

from conftest import SampleDandiset
from dandi.utils import find_files
Expand All @@ -19,7 +21,9 @@
log = logging.getLogger("test_backups2datalad.test_zarr")


def check_zarr(source_path: Path, zarrds: Dataset, checksum: str) -> None:
def check_zarr(
source_path: Path, zarrds: Dataset, checksum: Optional[str] = None
) -> None:
zarr_entries = {
Path(f).relative_to(source_path).as_posix()
for f in find_files(
Expand All @@ -45,7 +49,10 @@ def check_zarr(source_path: Path, zarrds: Dataset, checksum: str) -> None:
assert p.is_symlink() and not p.is_file()
assert all(zarrds.repo.is_under_annex(list(sync_entries)))
assert (zarrds.pathobj / CHECKSUM_FILE).is_file()
assert (zarrds.pathobj / CHECKSUM_FILE).read_text().strip() == checksum
if checksum is not None:
assert (zarrds.pathobj / CHECKSUM_FILE).read_text().strip() == checksum
else:
assert (zarrds.pathobj / CHECKSUM_FILE).exists()
assert zarrds.repo.is_under_annex([CHECKSUM_FILE]) == [False]


Expand Down Expand Up @@ -191,3 +198,68 @@ def test_backup_zarr_delete_zarr(new_dandiset: SampleDandiset, tmp_path: Path) -
assert not (tmp_path / "ds" / dandiset_id / "sample.zarr").exists()
gitrepo = GitRepo(tmp_path / "ds" / dandiset_id)
assert gitrepo.get_commit_subject("HEAD") == "[backups2datalad] 1 file deleted"


def test_backup_zarr_pathological(new_dandiset: SampleDandiset, tmp_path: Path) -> None:
zarr_path = new_dandiset.dspath / "sample.zarr"
zarr.save(zarr_path, np.arange(1000), np.arange(1000, 0, -1))
new_dandiset.upload()

client = new_dandiset.client
dandiset_id = new_dandiset.dandiset_id
asset = new_dandiset.dandiset.get_asset_by_path("sample.zarr")
sample_zarr_id = asset.zarr

client.post(
f"{new_dandiset.dandiset.version_api_path}assets/",
json={"metadata": {"path": "link.zarr"}, "zarr_id": sample_zarr_id},
)

r = client.post("/zarr/", json={"name": "empty.zarr", "dandiset": dandiset_id})
empty_zarr_id = r["zarr_id"]
client.post(
f"{new_dandiset.dandiset.version_api_path}assets/",
json={"metadata": {"path": "empty.zarr"}, "zarr_id": empty_zarr_id},
)

di = DandiDatasetter(
dandi_client=new_dandiset.client,
target_path=tmp_path / "ds",
config=Config(
content_url_regex=r".*/blobs/",
s3bucket="dandi-api-staging-dandisets",
zarr_target=tmp_path / "zarrs",
),
)

log.info("test_backup_zarr_pathological: Syncing Zarr dandiset")
di.update_from_backup([dandiset_id])

sample_zarrds = Dataset(tmp_path / "zarrs" / sample_zarr_id)
check_zarr(zarr_path, sample_zarrds, checksum=asset.get_digest().value)

(tmp_path / "empty").mkdir()
empty_zarrds = Dataset(tmp_path / "zarrs" / empty_zarr_id)
check_zarr(tmp_path / "empty", empty_zarrds)

ds = Dataset(tmp_path / "ds" / dandiset_id)
assert_repo_status(ds.path)

emptymod, linkmod, samplemod = sorted(
ds.repo.get_submodules_(), key=itemgetter("path")
)

assert emptymod["path"] == ds.pathobj / "empty.zarr"
assert emptymod["gitmodule_url"] == str(empty_zarrds.pathobj)
assert emptymod["type"] == "dataset"
assert emptymod["gitshasum"] == empty_zarrds.repo.format_commit("%H")

assert linkmod["path"] == ds.pathobj / "link.zarr"
assert linkmod["gitmodule_url"] == str(sample_zarrds.pathobj)
assert linkmod["type"] == "dataset"
assert linkmod["gitshasum"] == sample_zarrds.repo.format_commit("%H")

assert samplemod["path"] == ds.pathobj / "sample.zarr"
assert samplemod["gitmodule_url"] == str(sample_zarrds.pathobj)
assert samplemod["type"] == "dataset"
assert samplemod["gitshasum"] == sample_zarrds.repo.format_commit("%H")

0 comments on commit 8a444b1

Please sign in to comment.