Skip to content

Commit

Permalink
add unit tests for s3 resource and associated bug fixs for bugs found…
Browse files Browse the repository at this point in the history
… while testing
  • Loading branch information
jfischer committed Aug 11, 2021
1 parent 1bb6cb0 commit b5e5877
Show file tree
Hide file tree
Showing 8 changed files with 433 additions and 12 deletions.
30 changes: 20 additions & 10 deletions dataworkspaces/resources/s3/s3_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ def upload_file(self, local_path: str, rel_dest_path: str) -> None:
def does_subpath_exist(
self, subpath: str, must_be_file: bool = False, must_be_directory: bool = False
) -> bool:
if self.current_snapshot:
if subpath.startswith('.snapshots'):
return False
elif self.current_snapshot:
assert self.snapshot_fs
if not self.snapshot_fs.exists(subpath):
return False
Expand All @@ -149,12 +151,13 @@ def does_subpath_exist(
else:
return True
else:
if not self.fs.exists(subpath):
path = join(self.bucket_name, subpath)
if not self.fs.exists(path):
return False
elif must_be_file:
return self.fs.isfile(subpath)
return self.fs.isfile(path)
elif must_be_directory:
return not self.fs.isfile(subpath)
return not self.fs.isfile(path)
else:
return True

Expand All @@ -176,11 +179,17 @@ def ls(self, rel_path:str) -> List[str]:
assert self.snapshot_fs is not None
return self.snapshot_fs.ls(rel_path)
else:
# We call the S3FileSystem ls in this case. However,
# we need to exclude the snapshots. Also, there seems to be a bug where it
# includes the directory itself in some cases. We excluded that to prevent
# infinite loops when traversing the tree.
base = self.bucket_name + '/'
baselen = len(base)
return [
entry[baselen:] for entry in
self.fs.ls(join(self.bucket_name, rel_path))]
self.fs.ls(join(self.bucket_name, rel_path))
if not entry[baselen:].startswith('.snapshots')
and not entry[baselen:]==rel_path]

def delete_file(self, rel_path: str) -> None:
self._verify_no_snapshot()
Expand Down Expand Up @@ -219,6 +228,10 @@ def push(self) -> None:
def snapshot_precheck(self) -> None:
pass

def _ensure_fs_version_enabled(self):
if not self.fs.version_aware:
self.fs = S3FileSystem(version_aware=True)

def snapshot(self) -> Tuple[Optional[str], Optional[str]]:
if self.current_snapshot is not None:
# if a snapshot is already enabled, just return that one
Expand All @@ -228,8 +241,7 @@ def snapshot(self) -> Tuple[Optional[str], Optional[str]]:
with open(self.current_snapshot_file, 'w') as f:
f.write(self.current_snapshot)
self.snapshot_fs = S3Snapshot(versions)
if not self.fs.version_enabled:
self.fs = S3FileSystem(version_enabled=True)
self._ensure_fs_version_enabled()
return (self.current_snapshot, self.current_snapshot)

def restore_precheck(self, hashval):
Expand All @@ -246,9 +258,7 @@ def restore(self, hashval):
self.current_snapshot = hashval
with open(self.current_snapshot_file, 'w') as f:
f.write(hashval)
if not self.fs.version_enabled:
self.fs = S3FileSystem(version_enabled=True)

self._ensure_fs_version_enabled()

def delete_snapshot(
self, workspace_snapshot_hash: str, resource_restore_hash: str, relative_path: str
Expand Down
2 changes: 1 addition & 1 deletion dataworkspaces/resources/s3/snapfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def join(p, name):
if leaf in parent.subdirs:
return [join(join(parent.path, leaf), entry) for entry in parent.subdirs[leaf].entries]
else:
return [join(parent.path, path)]
return [path]

def exists(self, path):
parts = path.split("/")
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@

[mypy]
check_untyped_defs = true
2 changes: 2 additions & 0 deletions tests/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
./test
./remotes
./test_git_utils_data
/test_params.cfg
/utils_for_tests_data/
2 changes: 1 addition & 1 deletion tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ DATAWORKSPACES:=$(shell cd ../dataworkspaces; pwd)
help:
@echo targets are: test clean mypy pyflakes check help install-rclone-deb format-with-black

UNIT_TESTS=test_git_utils test_file_utils test_move_results test_snapshots test_push_pull test_local_files_resource test_hashtree test_lineage_utils test_git_fat_integration test_git_lfs test_lineage test_jupyter_kit test_sklearn_kit test_api test_wrapper_utils test_tensorflow test_scratch_dir test_export test_import test_rclone test_alternative_branch
UNIT_TESTS=test_git_utils test_file_utils test_move_results test_snapshots test_push_pull test_local_files_resource test_hashtree test_lineage_utils test_git_fat_integration test_git_lfs test_lineage test_jupyter_kit test_sklearn_kit test_api test_wrapper_utils test_tensorflow test_scratch_dir test_export test_import test_rclone test_alternative_branch test_s3_resource

MYPY_KITS=scikit_learn.py jupyter.py tensorflow.py wrapper_utils.py

Expand Down
23 changes: 23 additions & 0 deletions tests/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
=========================
Tests for Data Workspaces
=========================

This directory contains the test suite for DWS. To ensure you have all the dependencies, use
the Anaconda environment specified in dws-test-environment.yml. We use the oldest version of
Python we support to catch any accidental use of newer syntax contructs.

You can use the Makefile to drive the tests. ``make test`` will run the pyflakes checks,
mypy checks, and the unit test suite. Some tests use ``unittest.skipUnless`` to check
for any external requirements and skip the test if the requirements are not satisfied.

A few tests require special configuration:

* ``test_rclone.py`` has tests that need an ``rclone`` remote called *dws-test* to be
configured in the system's rclone configuration file.
* ``test_s3_resource.py`` has tests that require an s3 bucket. This bucket should be specified
in the file ``test_params.cfg`` is follows:

[s3_resource]
s3_bucket=YOUR_BUCKET_NAME


0 comments on commit b5e5877

Please sign in to comment.