From 5e8545240ff439963a4d4fd14a38f2da3cc592f2 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Mon, 12 Jul 2021 16:51:11 -0700 Subject: [PATCH 01/33] remove unused options/fixtures + separate ds/storage fixtures --- conftest.py | 286 ++++++------------ hub/api/tests/test_api.py | 16 +- hub/api/tests/test_api_with_compression.py | 8 +- hub/constants.py | 10 + .../storage/tests/test_storage_provider.py | 21 +- hub/core/tests/common.py | 47 --- hub/tests/common.py | 46 +-- hub/tests/common_benchmark.py | 36 --- hub/tests/dataset_fixtures.py | 30 ++ hub/tests/path_fixtures.py | 99 ++++++ hub/tests/storage_fixtures.py | 34 +++ setup.cfg | 2 - 12 files changed, 300 insertions(+), 335 deletions(-) delete mode 100644 hub/core/tests/common.py delete mode 100644 hub/tests/common_benchmark.py create mode 100644 hub/tests/dataset_fixtures.py create mode 100644 hub/tests/path_fixtures.py create mode 100644 hub/tests/storage_fixtures.py diff --git a/conftest.py b/conftest.py index 4381a9993b..315edc52d8 100644 --- a/conftest.py +++ b/conftest.py @@ -10,59 +10,14 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" logging.basicConfig(level=logging.ERROR) -from hub.api.dataset import Dataset -from hub.constants import ( - MIN_FIRST_CACHE_SIZE, - MIN_SECOND_CACHE_SIZE, - PYTEST_LOCAL_PROVIDER_BASE_ROOT, - PYTEST_MEMORY_PROVIDER_BASE_ROOT, - PYTEST_S3_PROVIDER_BASE_ROOT, -) -from hub.core.storage import LocalProvider, MemoryProvider, S3Provider -from hub.core.tests.common import LOCAL, MEMORY, S3 -from hub.core.typing import StorageProvider -from hub.tests.common import SESSION_ID, current_test_name, get_dummy_data_path +from hub.constants import * +from hub.tests.common import SESSION_ID from hub.util.cache_chain import get_cache_chain -MEMORY_OPT = "--memory-skip" -LOCAL_OPT = "--local" -S3_OPT = "--s3" -CACHE_OPT = "--cache-chains" -CACHE_ONLY_OPT = "--cache-chains-only" -S3_PATH_OPT = "--s3-path" -KEEP_STORAGE_OPT = "--keep-storage" -FULL_BENCHMARK_OPT = "--full-benchmarks" - -# @pytest.mark.`FULL_BENCHMARK_MARK` is how full benchmarks are notated -FULL_BENCHMARK_MARK = "full_benchmark" - - -def _get_storage_configs(request): - return { - MEMORY: { - "base_root": PYTEST_MEMORY_PROVIDER_BASE_ROOT, - "class": MemoryProvider, - "use_id": False, - "is_id_prefix": False, - # if is_id_prefix (and use_id=True), the session id comes before test name, otherwise it is reversed - }, - LOCAL: { - "base_root": PYTEST_LOCAL_PROVIDER_BASE_ROOT, - "class": LocalProvider, - "use_id": False, - "is_id_prefix": False, - }, - S3: { - "base_root": request.config.getoption(S3_PATH_OPT), - "class": S3Provider, - "use_id": True, - "is_id_prefix": True, - }, - } - - -def _is_opt_true(request, opt): - return request.config.getoption(opt) +# import * so all fixtures can be used accross the project +from hub.tests.path_fixtures import * +from hub.tests.dataset_fixtures import * +from hub.tests.storage_fixtures import * def pytest_addoption(parser): @@ -101,12 +56,6 @@ def pytest_addoption(parser): help="Url to s3 bucket with optional key. Example: s3://bucket_name/key/to/tests/", default=PYTEST_S3_PROVIDER_BASE_ROOT, ) - parser.addoption( - FULL_BENCHMARK_OPT, - action="store_true", - help="Some benchmarks take a long time to run and by default should be skipped. This option enables them. " - "It also force enables `--cache-chains`.", - ) parser.addoption( KEEP_STORAGE_OPT, action="store_true", @@ -115,44 +64,6 @@ def pytest_addoption(parser): ) -def _get_storage_provider( - request, storage_name, with_current_test_name=True, info_override={} -): - info = _get_storage_configs(request)[storage_name] - info.update(info_override) - - root = info["base_root"] - - path = "" - if with_current_test_name: - path = current_test_name() - - if info["use_id"]: - if info["is_id_prefix"]: - path = posixpath.join(SESSION_ID, path) - else: - path = posixpath.join(path, SESSION_ID) - - root = posixpath.join(root, path) - return info["class"](root) - - -def _get_memory_provider(request): - return _get_storage_provider(request, MEMORY) - - -def _get_local_provider(request): - return _get_storage_provider(request, LOCAL) - - -def _get_s3_provider(request): - return _get_storage_provider(request, S3) - - -def _get_dataset(storage: StorageProvider): - return Dataset(storage=storage) - - @pytest.fixture def marks(request): """Fixture that gets all `@pytest.mark`s. If a test is marked with @@ -211,62 +122,50 @@ def _storage_from_request(request): return get_cache_chain(storage_providers, cache_sizes) -@pytest.fixture -def memory_storage(request): - if not _is_opt_true(request, MEMORY_OPT): - return _get_memory_provider(request) - pytest.skip() - - -@pytest.fixture -def local_storage(request): - if _is_opt_true(request, LOCAL_OPT): - return _get_local_provider(request) - pytest.skip() - - -@pytest.fixture -def s3_storage(request): - if _is_opt_true(request, S3_OPT): - return _get_s3_provider(request) - pytest.skip() - - -@pytest.fixture -def storage(request): - return _storage_from_request(request) - - -@pytest.fixture -def memory_ds(memory_storage): - return _get_dataset(memory_storage) - - -@pytest.fixture -def local_ds(local_storage): - return _get_dataset(local_storage) - - -@pytest.fixture -def s3_ds(s3_storage): - return _get_dataset(s3_storage) - - -@pytest.fixture -def ds(request): - return _get_dataset(_storage_from_request(request)) - - -@pytest.fixture -def cat_path(): - path = get_dummy_data_path("compressed_images") - return os.path.join(path, "cat.jpeg") - - -@pytest.fixture -def flower_path(): - path = get_dummy_data_path("compressed_images") - return os.path.join(path, "flower.png") +# @pytest.fixture +# def memory_storage(request): +# if not _is_opt_true(request, MEMORY_OPT): +# return _get_memory_provider(request) +# pytest.skip() +# +# +# @pytest.fixture +# def local_storage(request): +# if _is_opt_true(request, LOCAL_OPT): +# return _get_local_provider(request) +# pytest.skip() +# +# +# @pytest.fixture +# def s3_storage(request): +# if _is_opt_true(request, S3_OPT): +# return _get_s3_provider(request) +# pytest.skip() +# +# +# @pytest.fixture +# def storage(request): +# return _storage_from_request(request) +# +# +# @pytest.fixture +# def memory_ds(memory_storage): +# return _get_dataset(memory_storage) +# +# +# @pytest.fixture +# def local_ds(local_storage): +# return _get_dataset(local_storage) +# +# +# @pytest.fixture +# def s3_ds(s3_storage): +# return _get_dataset(s3_storage) +# +# +# @pytest.fixture +# def ds(request): +# return _get_dataset(_storage_from_request(request)) def print_session_id(): @@ -278,50 +177,41 @@ def print_session_id(): print_session_id() -def _clear_storages(request): - # clear memory - if not _is_opt_true(request, MEMORY_OPT): - storage = _get_storage_provider(request, MEMORY, with_current_test_name=False) - storage.clear() - - # clear local - if _is_opt_true(request, LOCAL_OPT): - storage = _get_storage_provider(request, LOCAL, with_current_test_name=False) - storage.clear() - - # clear s3 - if _is_opt_true(request, S3_OPT): - storage = _get_storage_provider(request, S3, with_current_test_name=False) - storage.clear() - - -@pytest.fixture(scope="session", autouse=True) -def clear_storages_session(request): - # executed before the first test - _clear_storages(request) - - yield - - # executed after the last test - print_session_id() - - -@pytest.fixture(scope="function", autouse=True) -def clear_storages_function(request): - # executed before the current test - - yield - - # executed after the current test - if not _is_opt_true(request, KEEP_STORAGE_OPT): - _clear_storages(request) - - -@pytest.fixture(scope="function", autouse=True) -def skip_if_full_benchmarks_disabled(request, marks): - if _is_opt_true(request, FULL_BENCHMARK_OPT): - # don't skip anything if `FULL_BENCHMARK_OPT` is provided - return - - if FULL_BENCHMARK_MARK in marks: - pytest.skip() +# def _clear_storages(request): +# # clear memory +# if not _is_opt_true(request, MEMORY_OPT): +# storage = _get_storage_provider(request, MEMORY, with_current_test_name=False) +# storage.clear() +# +# # clear local +# if _is_opt_true(request, LOCAL_OPT): +# storage = _get_storage_provider(request, LOCAL, with_current_test_name=False) +# storage.clear() +# +# # clear s3 +# if _is_opt_true(request, S3_OPT): +# storage = _get_storage_provider(request, S3, with_current_test_name=False) +# storage.clear() +# +# +# @pytest.fixture(scope="session", autouse=True) +# def clear_storages_session(request): +# # executed before the first test +# _clear_storages(request) +# +# yield +# +# # executed after the last test +# print_session_id() +# +# +# @pytest.fixture(scope="function", autouse=True) +# def clear_storages_function(request): +# # executed before the current test +# +# yield +# +# # executed after the current test +# if not _is_opt_true(request, KEEP_STORAGE_OPT): +# _clear_storages(request) +# diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 6530362969..76e318b454 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -5,12 +5,12 @@ import hub import os from hub.api.dataset import Dataset -from hub.core.tests.common import parametrize_all_dataset_storages from hub.tests.common import assert_array_lists_equal from hub.util.exceptions import TensorDtypeMismatchError, TensorInvalidSampleShapeError from hub.client.client import HubBackendClient from hub.client.utils import has_hub_testing_creds from click.testing import CliRunner +from hub.tests.dataset_fixtures import all_enabled_datasets # need this for 32-bit and 64-bit systems to have correct tests @@ -72,7 +72,7 @@ def test_persist_local_clear_cache(local_storage): ds.delete() -@parametrize_all_dataset_storages +@all_enabled_datasets def test_populate_dataset(ds): assert ds.meta.tensors == [] ds.create_tensor("image") @@ -128,7 +128,7 @@ def test_stringify_with_path(local_ds): assert str(ds) == f"Dataset(path='{local_ds.path}', tensors=[])" -@parametrize_all_dataset_storages +@all_enabled_datasets def test_compute_fixed_tensor(ds): ds.create_tensor("image") ds.image.extend(np.ones((32, 28, 28))) @@ -136,7 +136,7 @@ def test_compute_fixed_tensor(ds): np.testing.assert_array_equal(ds.image.numpy(), np.ones((32, 28, 28))) -@parametrize_all_dataset_storages +@all_enabled_datasets def test_compute_dynamic_tensor(ds): ds.create_tensor("image") @@ -168,7 +168,7 @@ def test_compute_dynamic_tensor(ds): assert image.is_dynamic -@parametrize_all_dataset_storages +@all_enabled_datasets def test_empty_samples(ds: Dataset): tensor = ds.create_tensor("with_empty") @@ -200,7 +200,7 @@ def test_empty_samples(ds: Dataset): np.testing.assert_array_equal(actual, expected) -@parametrize_all_dataset_storages +@all_enabled_datasets def test_scalar_samples(ds: Dataset): tensor = ds.create_tensor("scalars") @@ -251,7 +251,7 @@ def test_scalar_samples(ds: Dataset): tensor.append([1, 2]) -@parametrize_all_dataset_storages +@all_enabled_datasets def test_sequence_samples(ds: Dataset): tensor = ds.create_tensor("arrays") @@ -267,7 +267,7 @@ def test_sequence_samples(ds: Dataset): assert_array_lists_equal(tensor.numpy(aslist=True), expected) -@parametrize_all_dataset_storages +@all_enabled_datasets def test_iterate_dataset(ds): labels = [1, 9, 7, 4] ds.create_tensor("image") diff --git a/hub/api/tests/test_api_with_compression.py b/hub/api/tests/test_api_with_compression.py index d22416e4b1..501427d48d 100644 --- a/hub/api/tests/test_api_with_compression.py +++ b/hub/api/tests/test_api_with_compression.py @@ -2,12 +2,12 @@ import pytest from hub.api.tensor import Tensor from hub.tests.common import TENSOR_KEY +from hub.tests.dataset_fixtures import all_enabled_datasets from hub.constants import UNCOMPRESSED import numpy as np import hub from hub import Dataset -from hub.core.tests.common import parametrize_all_dataset_storages def _populate_compressed_samples(tensor: Tensor, cat_path, flower_path, count=1): @@ -34,7 +34,7 @@ def _populate_compressed_samples(tensor: Tensor, cat_path, flower_path, count=1) return original_compressions -@parametrize_all_dataset_storages +@all_enabled_datasets def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path): images = ds.create_tensor(TENSOR_KEY, htype="image") @@ -53,7 +53,7 @@ def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path): assert images.shape_interval.upper == (5, 900, 900, 4) -@parametrize_all_dataset_storages +@all_enabled_datasets def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path): images = ds.create_tensor(TENSOR_KEY, htype="image") @@ -81,7 +81,7 @@ def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path): assert x.shape == expected_shape -@parametrize_all_dataset_storages +@all_enabled_datasets def test_uncompressed(ds: Dataset): images = ds.create_tensor(TENSOR_KEY, sample_compression=UNCOMPRESSED) diff --git a/hub/constants.py b/hub/constants.py index 3e3591a47f..55e327b2ce 100644 --- a/hub/constants.py +++ b/hub/constants.py @@ -49,3 +49,13 @@ PYTEST_MEMORY_PROVIDER_BASE_ROOT = "mem://hub_pytest" PYTEST_LOCAL_PROVIDER_BASE_ROOT = "/tmp/hub_pytest/" # TODO: may fail for windows PYTEST_S3_PROVIDER_BASE_ROOT = "s3://hub-2.0-tests/" + + +# pytest options +MEMORY_OPT = "--memory-skip" +LOCAL_OPT = "--local" +S3_OPT = "--s3" +S3_PATH_OPT = "--s3-path" +CACHE_OPT = "--cache-chains" +CACHE_ONLY_OPT = "--cache-chains-only" +KEEP_STORAGE_OPT = "--keep-storage" diff --git a/hub/core/storage/tests/test_storage_provider.py b/hub/core/storage/tests/test_storage_provider.py index e4931edf00..d3ed38dbf5 100644 --- a/hub/core/storage/tests/test_storage_provider.py +++ b/hub/core/storage/tests/test_storage_provider.py @@ -1,12 +1,31 @@ +from hub.client.config import LOCAL +from hub.tests.path_fixtures import ALL_STORAGES, MEMORY, S3 import pytest from click.testing import CliRunner from hub.constants import MB -from hub.core.tests.common import parametrize_all_caches, parametrize_all_storages import pickle + KEY = "file" +STORAGE_FIXTURE_NAME = "storage" + + +_ALL_CACHES_TUPLES = [(MEMORY, LOCAL), (MEMORY, S3), (LOCAL, S3), (MEMORY, LOCAL, S3)] +ALL_CACHES = list(map(lambda i: ",".join(i), _ALL_CACHES_TUPLES)) + +parametrize_all_storages = pytest.mark.parametrize( + STORAGE_FIXTURE_NAME, + ALL_STORAGES, + indirect=True, +) + +parametrize_all_caches = pytest.mark.parametrize( + STORAGE_FIXTURE_NAME, + ALL_CACHES, + indirect=True, +) # helper functions for tests def check_storage_provider(storage): diff --git a/hub/core/tests/common.py b/hub/core/tests/common.py deleted file mode 100644 index 28a4553ad8..0000000000 --- a/hub/core/tests/common.py +++ /dev/null @@ -1,47 +0,0 @@ -import pytest - -from typing import Dict, List - -import pytest - -from hub.core.typing import StorageProvider - -STORAGE_FIXTURE_NAME = "storage" -DATASET_FIXTURE_NAME = "ds" - -MEMORY = "memory" -LOCAL = "local" -S3 = "s3" - -ALL_PROVIDERS = [MEMORY, LOCAL, S3] - -_ALL_CACHES_TUPLES = [(MEMORY, LOCAL), (MEMORY, S3), (LOCAL, S3), (MEMORY, LOCAL, S3)] -ALL_CACHES = list(map(lambda i: ",".join(i), _ALL_CACHES_TUPLES)) - -parametrize_all_storages = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_PROVIDERS, - indirect=True, -) - -parametrize_all_caches = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_CACHES, - indirect=True, -) - -parametrize_all_storages_and_caches = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_PROVIDERS + ALL_CACHES, - indirect=True, -) - -parametrize_all_dataset_storages = pytest.mark.parametrize( - DATASET_FIXTURE_NAME, ALL_PROVIDERS, indirect=True -) - -parametrize_all_dataset_storages_and_caches = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_PROVIDERS + ALL_CACHES, # type: ignore - indirect=True, -) diff --git a/hub/tests/common.py b/hub/tests/common.py index 1aa7742e11..0306ca3b22 100644 --- a/hub/tests/common.py +++ b/hub/tests/common.py @@ -1,16 +1,15 @@ from PIL import Image, UnidentifiedImageError # type: ignore from io import BytesIO -from hub.api.tensor import Tensor import os import pathlib -from typing import Sequence, Tuple, List +from typing import Tuple, List from uuid import uuid1 import numpy as np import posixpath import pytest -from hub.constants import KB, MB, UNCOMPRESSED, USE_UNIFORM_COMPRESSION_PER_SAMPLE +from hub.constants import KB, MB, UNCOMPRESSED SESSION_ID = str(uuid1()) @@ -54,42 +53,6 @@ def get_dummy_data_path(subpath: str = ""): return os.path.join(_THIS_FILE, "dummy_data" + os.sep, subpath) -def get_random_array(shape: Tuple[int], dtype: str) -> np.ndarray: - dtype = dtype.lower() - - if "int" in dtype: - low = np.iinfo(dtype).min - high = np.iinfo(dtype).max - return np.random.randint(low=low, high=high, size=shape, dtype=dtype) - - if "float" in dtype: - # `low`/`high` have to be `float16` instead of `dtype` because `np.random.uniform` only supports `float16` - low = np.finfo("float16").min - high = np.finfo("float16").max - return np.random.uniform(low=low, high=high, size=shape).astype(dtype) - - if "bool" in dtype: - a = np.random.uniform(size=shape) - return a > 0.5 - - raise ValueError(f"Dtype '{dtype}' not supported.") - - -@parametrize_dtypes -@pytest.mark.parametrize( - SHAPE_PARAM, - ( - (100, 100), - (1,), - (1, 1, 1, 1, 1), - ), -) -def test_get_random_array(shape: Tuple[int], dtype: str): - array = get_random_array(shape, dtype) - assert array.shape == shape - assert array.dtype == dtype - - def get_actual_compression_from_buffer(buffer: memoryview) -> str: """Helpful for checking if actual compression matches expected.""" @@ -107,3 +70,8 @@ def assert_array_lists_equal(l1: List[np.ndarray], l2: List[np.ndarray]): """Assert that two lists of numpy arrays are equal""" for idx, (a1, a2) in enumerate(zip(l1, l2)): np.testing.assert_array_equal(a1, a2, err_msg=f"Array mismatch at index {idx}") + + +def is_opt_true(request, opt) -> bool: + """Returns if the pytest option `opt` is True. Assumes `opt` is a boolean value.""" + return request.config.getoption(opt) diff --git a/hub/tests/common_benchmark.py b/hub/tests/common_benchmark.py deleted file mode 100644 index 10c020abb7..0000000000 --- a/hub/tests/common_benchmark.py +++ /dev/null @@ -1,36 +0,0 @@ -import pytest - -from hub.constants import MB -from hub.tests.common import ( - CHUNK_SIZE_PARAM, - SHAPE_PARAM, - NUM_BATCHES_PARAM, - DTYPE_PARAM, -) - -# benchmark parametrizations -BENCHMARK_NUM_BATCHES = (1,) -BENCHMARK_DTYPES = ("int64",) -BENCHMARK_CHUNK_SIZES = (16 * MB,) - -FULL_BENCHMARK_BATCHED_SHAPES = ( - # with int64/float64 = ~1GB - (840, 224, 224, 3), -) - -BENCHMARK_BATCHED_SHAPES = ((5, 224, 224, 3),) - -# benchmark parametrize decorators -parametrize_benchmark_chunk_sizes = pytest.mark.parametrize( - CHUNK_SIZE_PARAM, BENCHMARK_CHUNK_SIZES -) -parametrize_benchmark_dtypes = pytest.mark.parametrize(DTYPE_PARAM, BENCHMARK_DTYPES) -parametrize_benchmark_shapes = pytest.mark.parametrize( - SHAPE_PARAM, BENCHMARK_BATCHED_SHAPES -) -parametrize_full_benchmark_shapes = pytest.mark.parametrize( - SHAPE_PARAM, FULL_BENCHMARK_BATCHED_SHAPES -) -parametrize_benchmark_num_batches = pytest.mark.parametrize( - NUM_BATCHES_PARAM, BENCHMARK_NUM_BATCHES -) diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py new file mode 100644 index 0000000000..1b2aab2761 --- /dev/null +++ b/hub/tests/dataset_fixtures.py @@ -0,0 +1,30 @@ +import pytest +from hub import Dataset + + +all_enabled_datasets = pytest.mark.parametrize( + "ds", + ["memory_ds", "local_ds", "s3_ds"], + indirect=True, +) + + +@pytest.fixture +def memory_ds(memory_path): + return Dataset(memory_path) + + +@pytest.fixture +def local_ds(local_path): + return Dataset(local_path) + + +@pytest.fixture +def s3_ds(s3_path): + return Dataset(s3_path) + + +@pytest.fixture +def ds(request): + """Used with parametrize to use all enabled dataset fixtures.""" + return request.getfixturevalue(request.param) diff --git a/hub/tests/path_fixtures.py b/hub/tests/path_fixtures.py new file mode 100644 index 0000000000..3835e6bd3a --- /dev/null +++ b/hub/tests/path_fixtures.py @@ -0,0 +1,99 @@ +import os +from conftest import S3_PATH_OPT +from hub.constants import ( + LOCAL_OPT, + MEMORY_OPT, + PYTEST_LOCAL_PROVIDER_BASE_ROOT, + PYTEST_MEMORY_PROVIDER_BASE_ROOT, + S3_OPT, +) +import posixpath +from hub.tests.common import ( + SESSION_ID, + current_test_name, + get_dummy_data_path, + is_opt_true, +) +import pytest + +MEMORY = "memory" +LOCAL = "local" +S3 = "s3" + +ALL_STORAGES = [MEMORY, LOCAL, S3] + + +def _get_path_composition_configs(request): + return { + MEMORY: { + "base_root": PYTEST_MEMORY_PROVIDER_BASE_ROOT, + "use_id": False, + "is_id_prefix": False, + # if is_id_prefix (and use_id=True), the session id comes before test name, otherwise it is reversed + }, + LOCAL: { + "base_root": PYTEST_LOCAL_PROVIDER_BASE_ROOT, + "use_id": False, + "is_id_prefix": False, + }, + S3: { + "base_root": request.config.getoption(S3_PATH_OPT), + "use_id": True, + "is_id_prefix": True, + }, + } + + +def _get_storage_path( + request, storage_name, with_current_test_name=True, info_override={} +): + info = _get_path_composition_configs(request)[storage_name] + info.update(info_override) + + root = info["base_root"] + + path = "" + if with_current_test_name: + path = current_test_name() + + if info["use_id"]: + if info["is_id_prefix"]: + path = posixpath.join(SESSION_ID, path) + else: + path = posixpath.join(path, SESSION_ID) + + root = posixpath.join(root, path) + return root + + +@pytest.fixture +def memory_path(request): + if not is_opt_true(request, MEMORY_OPT): + return _get_storage_path(request, MEMORY) + pytest.skip() + + +@pytest.fixture +def local_path(request): + if is_opt_true(request, LOCAL_OPT): + return _get_storage_path(request, LOCAL) + pytest.skip() + + +@pytest.fixture +def s3_path(request): + if is_opt_true(request, S3_OPT): + return _get_storage_path(request, S3) + pytest.skip() + + +@pytest.fixture +def cat_path(): + path = get_dummy_data_path("compressed_images") + return os.path.join(path, "cat.jpeg") + + +@pytest.fixture +def flower_path(): + path = get_dummy_data_path("compressed_images") + return os.path.join(path, "flower.png") diff --git a/hub/tests/storage_fixtures.py b/hub/tests/storage_fixtures.py new file mode 100644 index 0000000000..056a2db5ef --- /dev/null +++ b/hub/tests/storage_fixtures.py @@ -0,0 +1,34 @@ +from hub.core.storage.s3 import S3Provider +from hub.core.storage.local import LocalProvider +from hub.core.storage.memory import MemoryProvider +from hub.api.dataset import Dataset +from hub.core.storage.provider import StorageProvider +import pytest + + +all_enabled_storages = pytest.mark.parametrize( + "storage", + ["memory_storage", "local_storage", "s3_storage"], + indirect=True, +) + + +@pytest.fixture +def memory_storage(memory_path): + return MemoryProvider(memory_path) + + +@pytest.fixture +def local_storage(local_path): + return LocalProvider(local_path) + + +@pytest.fixture +def s3_storage(s3_path): + return S3Provider(s3_path) + + +@pytest.fixture +def storage(request): + """Used with parametrize to use all enabled storage fixtures.""" + return request.getfixturevalue(request.param) diff --git a/setup.cfg b/setup.cfg index dacc7f70d9..8de5c249a9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,5 @@ [tool:pytest] addopts = -s -markers = - full_benchmark: mark a benchmark that will take a long time as "full". in order to run this benchmark, the `--full-benchmarks` option is required. [mypy] warn_redundant_casts = True From a23e28f8f802dabd78fc547aeeaddc39c81534f3 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Mon, 12 Jul 2021 17:05:01 -0700 Subject: [PATCH 02/33] local ds generator --- hub/api/tests/test_api.py | 35 ++++++++++++++++++----------------- hub/tests/dataset_fixtures.py | 8 ++++++++ hub/tests/path_fixtures.py | 33 ++++++++++++++++++++++++++++----- 3 files changed, 54 insertions(+), 22 deletions(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 76e318b454..f000d08b33 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -18,12 +18,13 @@ MAX_FLOAT_DTYPE = np.float_.__name__ -def test_persist_local(local_storage): - ds = Dataset(local_storage.root, local_cache_size=512) +def test_persist_local(local_ds_generator): + ds = local_ds_generator() + ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) - ds_new = Dataset(local_storage.root) + ds_new = local_ds_generator() assert len(ds_new) == 4 assert ds_new.image.shape == (4, 224, 224, 3) @@ -34,15 +35,15 @@ def test_persist_local(local_storage): ds.delete() -def test_persist_with_local(local_storage): - with Dataset(local_storage.root, local_cache_size=512) as ds: +def test_persist_with_local(local_ds_generator): + with local_ds_generator() as ds: ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) - ds_new = Dataset(local_storage.root) + ds_new = local_ds_generator() assert len(ds_new) == 0 # shouldn't be flushed yet - ds_new = Dataset(local_storage.root) + ds_new = local_ds_generator() assert len(ds_new) == 4 engine = ds_new.image.chunk_engine @@ -58,12 +59,12 @@ def test_persist_with_local(local_storage): ds.delete() -def test_persist_local_clear_cache(local_storage): - ds = Dataset(local_storage.root, local_cache_size=512) +def test_persist_local_clear_cache(local_ds_generator): + ds = local_ds_generator() ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) ds.clear_cache() - ds_new = Dataset(local_storage.root) + ds_new = local_ds_generator() assert len(ds_new) == 4 assert ds_new.image.shape == (4, 224, 224, 3) @@ -97,13 +98,12 @@ def test_populate_dataset(ds): @pytest.mark.xfail(raises=NotImplementedError, strict=True) -def test_larger_data_memory(memory_storage): - ds = Dataset(memory_storage.root) - ds.create_tensor("image") - ds.image.extend(np.ones((4, 4096, 4096))) - assert len(ds) == 4 - assert ds.image.shape == (4, 4096, 4096) - np.testing.assert_array_equal(ds.image.numpy(), np.ones((4, 4096, 4096))) +def test_larger_data_memory(memory_ds): + memory_ds.create_tensor("image") + memory_ds.image.extend(np.ones((4, 4096, 4096))) + assert len(memory_ds) == 4 + assert memory_ds.image.shape == (4, 4096, 4096) + np.testing.assert_array_equal(memory_ds.image.numpy(), np.ones((4, 4096, 4096))) def test_stringify(memory_ds): @@ -446,6 +446,7 @@ def test_fails_on_wrong_tensor_syntax(memory_ds): @pytest.mark.skipif(not has_hub_testing_creds(), reason="requires hub credentials") def test_hub_cloud_dataset(): + # TODO: turn this into a fixture and remove skipif username = "testingacc" password = os.getenv("ACTIVELOOP_HUB_PASSWORD") diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py index 1b2aab2761..97df4289e9 100644 --- a/hub/tests/dataset_fixtures.py +++ b/hub/tests/dataset_fixtures.py @@ -19,6 +19,14 @@ def local_ds(local_path): return Dataset(local_path) +@pytest.fixture +def local_ds_generator(local_path): + def generate_local_ds(): + return Dataset(local_path) + + return generate_local_ds + + @pytest.fixture def s3_ds(s3_path): return Dataset(s3_path) diff --git a/hub/tests/path_fixtures.py b/hub/tests/path_fixtures.py index 3835e6bd3a..cacd558a97 100644 --- a/hub/tests/path_fixtures.py +++ b/hub/tests/path_fixtures.py @@ -1,6 +1,9 @@ +from hub.core.storage.s3 import S3Provider +from hub.core.storage.local import LocalProvider import os from conftest import S3_PATH_OPT from hub.constants import ( + KEEP_STORAGE_OPT, LOCAL_OPT, MEMORY_OPT, PYTEST_LOCAL_PROVIDER_BASE_ROOT, @@ -69,31 +72,51 @@ def _get_storage_path( @pytest.fixture def memory_path(request): if not is_opt_true(request, MEMORY_OPT): + # no need to clear memory paths return _get_storage_path(request, MEMORY) - pytest.skip() + else: + pytest.skip() @pytest.fixture def local_path(request): if is_opt_true(request, LOCAL_OPT): - return _get_storage_path(request, LOCAL) - pytest.skip() + path = _get_storage_path(request, LOCAL) + + yield path + + # clear storage unless flagged otherwise + if not is_opt_true(request, KEEP_STORAGE_OPT): + LocalProvider(path).clear() + else: + pytest.skip() @pytest.fixture def s3_path(request): if is_opt_true(request, S3_OPT): - return _get_storage_path(request, S3) - pytest.skip() + path = _get_storage_path(request, S3) + + yield path + + # clear storage unless flagged otherwise + if not is_opt_true(request, KEEP_STORAGE_OPT): + S3Provider(path).clear() + else: + pytest.skip() @pytest.fixture def cat_path(): + """Path to a cat image in the dummy data folder.""" + path = get_dummy_data_path("compressed_images") return os.path.join(path, "cat.jpeg") @pytest.fixture def flower_path(): + """Path to a flower image in the dummy data folder.""" + path = get_dummy_data_path("compressed_images") return os.path.join(path, "flower.png") From 63a5056a30cfd1b0f3e94e3375a9ed3f489661e0 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Mon, 12 Jul 2021 17:44:14 -0700 Subject: [PATCH 03/33] start setting up hub cloud credentials --- conftest.py | 15 ++--- hub/api/tests/test_api.py | 14 ++--- hub/cli/test_cli.py | 1 - hub/client/client.py | 5 +- hub/client/config.py | 4 +- hub/client/test_client.py | 1 - hub/client/utils.py | 6 -- hub/constants.py | 1 + .../storage/tests/test_storage_provider.py | 3 +- hub/tests/client_fixtures.py | 28 +++++++++ hub/tests/dataset_fixtures.py | 8 ++- hub/tests/path_fixtures.py | 62 +++++++++++++------ 12 files changed, 93 insertions(+), 55 deletions(-) create mode 100644 hub/tests/client_fixtures.py diff --git a/conftest.py b/conftest.py index 315edc52d8..52258fe065 100644 --- a/conftest.py +++ b/conftest.py @@ -18,24 +18,19 @@ from hub.tests.path_fixtures import * from hub.tests.dataset_fixtures import * from hub.tests.storage_fixtures import * +from hub.tests.client_fixtures import * def pytest_addoption(parser): parser.addoption( - MEMORY_OPT, - action="store_true", - help="Tests using the `memory_storage` fixture will be skipped. Tests using the `storage` fixture will be " - "skipped if called with `MemoryProvider`.", + MEMORY_OPT, action="store_true", help="Memory tests will be SKIPPED if enabled." ) parser.addoption( - LOCAL_OPT, - action="store_true", - help="Tests using the `storage`/`local_storage` fixtures will run with `LocalProvider`.", + LOCAL_OPT, action="store_true", help="Local tests will run if enabled." ) + parser.addoption(S3_OPT, action="store_true", help="S3 tests will run if enabled.") parser.addoption( - S3_OPT, - action="store_true", - help="Tests using the `storage`/`s3_storage` fixtures will run with `S3Provider`.", + HUB_CLOUD_OPT, action="store_true", help="Hub cloud tests will run if enabled." ) parser.addoption( CACHE_OPT, diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index f000d08b33..d7b6dfe696 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -8,7 +8,6 @@ from hub.tests.common import assert_array_lists_equal from hub.util.exceptions import TensorDtypeMismatchError, TensorInvalidSampleShapeError from hub.client.client import HubBackendClient -from hub.client.utils import has_hub_testing_creds from click.testing import CliRunner from hub.tests.dataset_fixtures import all_enabled_datasets @@ -444,16 +443,11 @@ def test_fails_on_wrong_tensor_syntax(memory_ds): memory_ds.some_tensor = np.ones((28, 28)) -@pytest.mark.skipif(not has_hub_testing_creds(), reason="requires hub credentials") -def test_hub_cloud_dataset(): - # TODO: turn this into a fixture and remove skipif - username = "testingacc" - password = os.getenv("ACTIVELOOP_HUB_PASSWORD") +def test_hub_cloud_dataset(hub_cloud_ds): + # TODO: remove this and run all tests on hub cloud + + ds = hub_cloud_ds - client = HubBackendClient() - token = client.request_auth_token(username, password) - id = str(uuid.uuid1()) - ds = Dataset(f"hub://testingacc/hub2ds2_{id}", token=token) ds.create_tensor("image") for i in range(10): diff --git a/hub/cli/test_cli.py b/hub/cli/test_cli.py index daf121c7a9..f21e4a07ea 100644 --- a/hub/cli/test_cli.py +++ b/hub/cli/test_cli.py @@ -5,7 +5,6 @@ from click.testing import CliRunner from hub.cli.auth import login, logout -from hub.client.utils import has_hub_testing_creds @pytest.mark.skipif(not has_hub_testing_creds(), reason="requires hub credentials") diff --git a/hub/client/client.py b/hub/client/client.py index 2cba6a9383..28899eabe2 100644 --- a/hub/client/client.py +++ b/hub/client/client.py @@ -13,7 +13,6 @@ GET_DATASET_CREDENTIALS_SUFFIX, CREATE_DATASET_SUFFIX, DATASET_SUFFIX, - UPDATE_SUFFIX, ) from hub.client.log import logger import hub @@ -101,9 +100,9 @@ def request( return response def endpoint(self): - if hub.client.config.LOCAL: + if hub.client.config.USE_LOCAL_HOST: return HUB_REST_ENDPOINT_LOCAL - if hub.client.config.DEV: + if hub.client.config.USE_DEV_ENVIRONMENT: return HUB_REST_ENDPOINT_DEV return HUB_REST_ENDPOINT diff --git a/hub/client/config.py b/hub/client/config.py index 7d3e687eb8..31c505f85c 100644 --- a/hub/client/config.py +++ b/hub/client/config.py @@ -6,8 +6,8 @@ HUB_REST_ENDPOINT = "https://app.activeloop.ai" HUB_REST_ENDPOINT_DEV = "https://app.dev.activeloop.ai" HUB_REST_ENDPOINT_LOCAL = "http://localhost:5000" -LOCAL = False -DEV = False +USE_LOCAL_HOST = False +USE_DEV_ENVIRONMENT = False GET_TOKEN_SUFFIX = "/api/user/token" REGISTER_USER_SUFFIX = "/api/user/register" diff --git a/hub/client/test_client.py b/hub/client/test_client.py index 051af31689..84db9ac0be 100644 --- a/hub/client/test_client.py +++ b/hub/client/test_client.py @@ -7,7 +7,6 @@ write_token, read_token, remove_token, - has_hub_testing_creds, ) diff --git a/hub/client/utils.py b/hub/client/utils.py index 9c38c563cb..b173169ca7 100644 --- a/hub/client/utils.py +++ b/hub/client/utils.py @@ -48,12 +48,6 @@ def remove_token(): os.remove(TOKEN_FILE_PATH) -def has_hub_testing_creds(): - """Checks if credentials exists""" - env = os.getenv("ACTIVELOOP_HUB_PASSWORD") - return env is not None - - def check_response_status(response: requests.Response): """Check response status and throw corresponding exception on failure.""" code = response.status_code diff --git a/hub/constants.py b/hub/constants.py index 55e327b2ce..cd96f3e933 100644 --- a/hub/constants.py +++ b/hub/constants.py @@ -55,6 +55,7 @@ MEMORY_OPT = "--memory-skip" LOCAL_OPT = "--local" S3_OPT = "--s3" +HUB_CLOUD_OPT = "--hub-cloud" S3_PATH_OPT = "--s3-path" CACHE_OPT = "--cache-chains" CACHE_ONLY_OPT = "--cache-chains-only" diff --git a/hub/core/storage/tests/test_storage_provider.py b/hub/core/storage/tests/test_storage_provider.py index d3ed38dbf5..4bf45f4cd0 100644 --- a/hub/core/storage/tests/test_storage_provider.py +++ b/hub/core/storage/tests/test_storage_provider.py @@ -1,5 +1,4 @@ -from hub.client.config import LOCAL -from hub.tests.path_fixtures import ALL_STORAGES, MEMORY, S3 +from hub.tests.path_fixtures import ALL_STORAGES, MEMORY, S3, LOCAL import pytest from click.testing import CliRunner diff --git a/hub/tests/client_fixtures.py b/hub/tests/client_fixtures.py new file mode 100644 index 0000000000..4ae135aae5 --- /dev/null +++ b/hub/tests/client_fixtures.py @@ -0,0 +1,28 @@ +from hub.constants import HUB_CLOUD_OPT +from hub.tests.common import is_opt_true +import os +import pytest +from hub.client.client import HubBackendClient +import hub + + +@pytest.fixture(scope="session") +def hub_testing_credentials(request): + if not is_opt_true(request, HUB_CLOUD_OPT): + pytest.skip() + + hub.client.config.USE_DEV_ENVIRONMENT = True + + username = "testingacc" + password = os.getenv("ACTIVELOOP_HUB_PASSWORD") + return username, password + + +@pytest.fixture(scope="session") +def hub_testing_token(hub_testing_credentials): + username, password = hub_testing_credentials + + # TODO: use DEV environment + client = HubBackendClient() + token = client.request_auth_token(username, password) + return token diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py index 97df4289e9..bcb9ac790e 100644 --- a/hub/tests/dataset_fixtures.py +++ b/hub/tests/dataset_fixtures.py @@ -1,10 +1,11 @@ +import os import pytest from hub import Dataset all_enabled_datasets = pytest.mark.parametrize( "ds", - ["memory_ds", "local_ds", "s3_ds"], + ["memory_ds", "local_ds", "s3_ds"], # TODO: add hub cloud indirect=True, ) @@ -32,6 +33,11 @@ def s3_ds(s3_path): return Dataset(s3_path) +@pytest.fixture +def hub_cloud_ds(hub_cloud_path, hub_testing_token): + return Dataset(hub_cloud_path, token=hub_testing_token) + + @pytest.fixture def ds(request): """Used with parametrize to use all enabled dataset fixtures.""" diff --git a/hub/tests/path_fixtures.py b/hub/tests/path_fixtures.py index cacd558a97..644f5f121b 100644 --- a/hub/tests/path_fixtures.py +++ b/hub/tests/path_fixtures.py @@ -1,8 +1,10 @@ +from hub.util.get_storage_provider import storage_provider_from_hub_path from hub.core.storage.s3 import S3Provider from hub.core.storage.local import LocalProvider import os from conftest import S3_PATH_OPT from hub.constants import ( + HUB_CLOUD_OPT, KEEP_STORAGE_OPT, LOCAL_OPT, MEMORY_OPT, @@ -22,6 +24,7 @@ MEMORY = "memory" LOCAL = "local" S3 = "s3" +HUB_CLOUD = "hub_cloud" ALL_STORAGES = [MEMORY, LOCAL, S3] @@ -71,39 +74,60 @@ def _get_storage_path( @pytest.fixture def memory_path(request): - if not is_opt_true(request, MEMORY_OPT): - # no need to clear memory paths - return _get_storage_path(request, MEMORY) - else: + if is_opt_true(request, MEMORY_OPT): pytest.skip() + return + + # no need to clear memory paths + return _get_storage_path(request, MEMORY) @pytest.fixture def local_path(request): - if is_opt_true(request, LOCAL_OPT): - path = _get_storage_path(request, LOCAL) + if not is_opt_true(request, LOCAL_OPT): + pytest.skip() + return - yield path + path = _get_storage_path(request, LOCAL) - # clear storage unless flagged otherwise - if not is_opt_true(request, KEEP_STORAGE_OPT): - LocalProvider(path).clear() - else: - pytest.skip() + yield path + + # clear storage unless flagged otherwise + if not is_opt_true(request, KEEP_STORAGE_OPT): + LocalProvider(path).clear() @pytest.fixture def s3_path(request): - if is_opt_true(request, S3_OPT): - path = _get_storage_path(request, S3) + if not is_opt_true(request, S3_OPT): + pytest.skip() + return + + path = _get_storage_path(request, S3) + + yield path - yield path + # clear storage unless flagged otherwise + if not is_opt_true(request, KEEP_STORAGE_OPT): + S3Provider(path).clear() - # clear storage unless flagged otherwise - if not is_opt_true(request, KEEP_STORAGE_OPT): - S3Provider(path).clear() - else: + +@pytest.fixture +def hub_cloud_path(request, hub_testing_token): + # TODO: can probably generalize these fixtures + + # TODO: skipif + if not is_opt_true(request, HUB_CLOUD_OPT): pytest.skip() + return + + path = _get_storage_path(request, HUB_CLOUD) + + yield path + + # clear storage unless flagged otherwise + if not is_opt_true(request, KEEP_STORAGE_OPT): + storage_provider_from_hub_path(path, token=hub_testing_token).clear() @pytest.fixture From a1a4f059cc0ddaa925e0c500aec6071c226319a9 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Mon, 12 Jul 2021 18:08:10 -0700 Subject: [PATCH 04/33] hub cloud fixtures work --- hub/api/tests/test_api.py | 8 ++++---- hub/constants.py | 4 ++++ hub/tests/client_fixtures.py | 15 +++++++-------- hub/tests/dataset_fixtures.py | 16 ++++++++++++---- hub/tests/path_fixtures.py | 13 +++++++++++++ 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index d7b6dfe696..2088e7c668 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -443,19 +443,19 @@ def test_fails_on_wrong_tensor_syntax(memory_ds): memory_ds.some_tensor = np.ones((28, 28)) -def test_hub_cloud_dataset(hub_cloud_ds): +def test_hub_cloud_dataset(hub_cloud_ds_generator): # TODO: remove this and run all tests on hub cloud - ds = hub_cloud_ds + ds = hub_cloud_ds_generator() ds.create_tensor("image") for i in range(10): ds.image.append(i * np.ones((100, 100))) - token = ds.token del ds - ds = Dataset(f"hub://testingacc/hub2ds2_{id}", token=token) + + ds = hub_cloud_ds_generator() for i in range(10): np.testing.assert_array_equal(ds.image[i].numpy(), i * np.ones((100, 100))) diff --git a/hub/constants.py b/hub/constants.py index cd96f3e933..6bf7d87218 100644 --- a/hub/constants.py +++ b/hub/constants.py @@ -46,9 +46,13 @@ # caclulate the number of bits to shift right when converting a 128-bit uuid into `ENCODING_DTYPE` UUID_SHIFT_AMOUNT = 128 - (8 * ENCODING_DTYPE(1).itemsize) + +HUB_CLOUD_DEV_USERNAME = "testingacc" + PYTEST_MEMORY_PROVIDER_BASE_ROOT = "mem://hub_pytest" PYTEST_LOCAL_PROVIDER_BASE_ROOT = "/tmp/hub_pytest/" # TODO: may fail for windows PYTEST_S3_PROVIDER_BASE_ROOT = "s3://hub-2.0-tests/" +PYTEST_HUB_CLOUD_PROVIDER_BASE_ROOT = f"hub://{HUB_CLOUD_DEV_USERNAME}/" # pytest options diff --git a/hub/tests/client_fixtures.py b/hub/tests/client_fixtures.py index 4ae135aae5..96608b5bac 100644 --- a/hub/tests/client_fixtures.py +++ b/hub/tests/client_fixtures.py @@ -1,4 +1,4 @@ -from hub.constants import HUB_CLOUD_OPT +from hub.constants import HUB_CLOUD_DEV_USERNAME, HUB_CLOUD_OPT from hub.tests.common import is_opt_true import os import pytest @@ -7,22 +7,21 @@ @pytest.fixture(scope="session") -def hub_testing_credentials(request): +def hub_dev_credentials(request): + # TODO: skipif if not is_opt_true(request, HUB_CLOUD_OPT): pytest.skip() - hub.client.config.USE_DEV_ENVIRONMENT = True + # TODO: use dev environment - username = "testingacc" password = os.getenv("ACTIVELOOP_HUB_PASSWORD") - return username, password + return HUB_CLOUD_DEV_USERNAME, password @pytest.fixture(scope="session") -def hub_testing_token(hub_testing_credentials): - username, password = hub_testing_credentials +def hub_testing_token(hub_dev_credentials): + username, password = hub_dev_credentials - # TODO: use DEV environment client = HubBackendClient() token = client.request_auth_token(username, password) return token diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py index bcb9ac790e..be2d5ea8b2 100644 --- a/hub/tests/dataset_fixtures.py +++ b/hub/tests/dataset_fixtures.py @@ -16,8 +16,8 @@ def memory_ds(memory_path): @pytest.fixture -def local_ds(local_path): - return Dataset(local_path) +def local_ds(local_ds_generator): + return local_ds_generator() @pytest.fixture @@ -34,8 +34,16 @@ def s3_ds(s3_path): @pytest.fixture -def hub_cloud_ds(hub_cloud_path, hub_testing_token): - return Dataset(hub_cloud_path, token=hub_testing_token) +def hub_cloud_ds(hub_cloud_ds_generator): + return hub_cloud_ds_generator() + + +@pytest.fixture +def hub_cloud_ds_generator(hub_cloud_path, hub_testing_token): + def generate_hub_cloud_ds(): + return Dataset(hub_cloud_path, token=hub_testing_token) + + return generate_hub_cloud_ds @pytest.fixture diff --git a/hub/tests/path_fixtures.py b/hub/tests/path_fixtures.py index 644f5f121b..2f158cfeaa 100644 --- a/hub/tests/path_fixtures.py +++ b/hub/tests/path_fixtures.py @@ -8,6 +8,7 @@ KEEP_STORAGE_OPT, LOCAL_OPT, MEMORY_OPT, + PYTEST_HUB_CLOUD_PROVIDER_BASE_ROOT, PYTEST_LOCAL_PROVIDER_BASE_ROOT, PYTEST_MEMORY_PROVIDER_BASE_ROOT, S3_OPT, @@ -36,16 +37,25 @@ def _get_path_composition_configs(request): "use_id": False, "is_id_prefix": False, # if is_id_prefix (and use_id=True), the session id comes before test name, otherwise it is reversed + "use_underscores": False, }, LOCAL: { "base_root": PYTEST_LOCAL_PROVIDER_BASE_ROOT, "use_id": False, "is_id_prefix": False, + "use_underscores": False, }, S3: { "base_root": request.config.getoption(S3_PATH_OPT), "use_id": True, "is_id_prefix": True, + "use_underscores": False, + }, + HUB_CLOUD: { + "base_root": PYTEST_HUB_CLOUD_PROVIDER_BASE_ROOT, + "use_id": True, + "is_id_prefix": True, + "use_underscores": True, }, } @@ -68,6 +78,9 @@ def _get_storage_path( else: path = posixpath.join(path, SESSION_ID) + if info["use_underscores"]: + path = path.replace("/", "_") + root = posixpath.join(root, path) return root From 669dab61ae971f31cd6234211b0346b32328d1cc Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Mon, 12 Jul 2021 18:15:44 -0700 Subject: [PATCH 05/33] persistent dataset tests --- hub/api/tests/test_api.py | 61 +++++++++------------- hub/api/tests/test_api_with_compression.py | 8 +-- hub/tests/dataset_fixtures.py | 28 ++++++++-- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 2088e7c668..69cb64f88a 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -9,7 +9,10 @@ from hub.util.exceptions import TensorDtypeMismatchError, TensorInvalidSampleShapeError from hub.client.client import HubBackendClient from click.testing import CliRunner -from hub.tests.dataset_fixtures import all_enabled_datasets +from hub.tests.dataset_fixtures import ( + enabled_datasets, + enabled_persistent_dataset_generators, +) # need this for 32-bit and 64-bit systems to have correct tests @@ -17,13 +20,14 @@ MAX_FLOAT_DTYPE = np.float_.__name__ -def test_persist_local(local_ds_generator): - ds = local_ds_generator() +@enabled_persistent_dataset_generators +def test_persist(ds_generator): + ds = ds_generator() ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) - ds_new = local_ds_generator() + ds_new = ds_generator() assert len(ds_new) == 4 assert ds_new.image.shape == (4, 224, 224, 3) @@ -34,15 +38,16 @@ def test_persist_local(local_ds_generator): ds.delete() -def test_persist_with_local(local_ds_generator): - with local_ds_generator() as ds: +@enabled_persistent_dataset_generators +def test_persist_with(ds_generator): + with ds_generator() as ds: ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) - ds_new = local_ds_generator() + ds_new = ds_generator() assert len(ds_new) == 0 # shouldn't be flushed yet - ds_new = local_ds_generator() + ds_new = ds_generator() assert len(ds_new) == 4 engine = ds_new.image.chunk_engine @@ -58,12 +63,13 @@ def test_persist_with_local(local_ds_generator): ds.delete() -def test_persist_local_clear_cache(local_ds_generator): - ds = local_ds_generator() +@enabled_persistent_dataset_generators +def test_persist_local_clear_cache(ds_generator): + ds = ds_generator() ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) ds.clear_cache() - ds_new = local_ds_generator() + ds_new = ds_generator() assert len(ds_new) == 4 assert ds_new.image.shape == (4, 224, 224, 3) @@ -72,7 +78,7 @@ def test_persist_local_clear_cache(local_ds_generator): ds.delete() -@all_enabled_datasets +@enabled_datasets def test_populate_dataset(ds): assert ds.meta.tensors == [] ds.create_tensor("image") @@ -127,7 +133,7 @@ def test_stringify_with_path(local_ds): assert str(ds) == f"Dataset(path='{local_ds.path}', tensors=[])" -@all_enabled_datasets +@enabled_datasets def test_compute_fixed_tensor(ds): ds.create_tensor("image") ds.image.extend(np.ones((32, 28, 28))) @@ -135,7 +141,7 @@ def test_compute_fixed_tensor(ds): np.testing.assert_array_equal(ds.image.numpy(), np.ones((32, 28, 28))) -@all_enabled_datasets +@enabled_datasets def test_compute_dynamic_tensor(ds): ds.create_tensor("image") @@ -167,7 +173,7 @@ def test_compute_dynamic_tensor(ds): assert image.is_dynamic -@all_enabled_datasets +@enabled_datasets def test_empty_samples(ds: Dataset): tensor = ds.create_tensor("with_empty") @@ -199,7 +205,7 @@ def test_empty_samples(ds: Dataset): np.testing.assert_array_equal(actual, expected) -@all_enabled_datasets +@enabled_datasets def test_scalar_samples(ds: Dataset): tensor = ds.create_tensor("scalars") @@ -250,7 +256,7 @@ def test_scalar_samples(ds: Dataset): tensor.append([1, 2]) -@all_enabled_datasets +@enabled_datasets def test_sequence_samples(ds: Dataset): tensor = ds.create_tensor("arrays") @@ -266,7 +272,7 @@ def test_sequence_samples(ds: Dataset): assert_array_lists_equal(tensor.numpy(aslist=True), expected) -@all_enabled_datasets +@enabled_datasets def test_iterate_dataset(ds): labels = [1, 9, 7, 4] ds.create_tensor("image") @@ -443,25 +449,6 @@ def test_fails_on_wrong_tensor_syntax(memory_ds): memory_ds.some_tensor = np.ones((28, 28)) -def test_hub_cloud_dataset(hub_cloud_ds_generator): - # TODO: remove this and run all tests on hub cloud - - ds = hub_cloud_ds_generator() - - ds.create_tensor("image") - - for i in range(10): - ds.image.append(i * np.ones((100, 100))) - - del ds - - ds = hub_cloud_ds_generator() - for i in range(10): - np.testing.assert_array_equal(ds.image[i].numpy(), i * np.ones((100, 100))) - - ds.delete() - - def test_empty_dataset(): with CliRunner().isolated_filesystem(): ds = Dataset("test") diff --git a/hub/api/tests/test_api_with_compression.py b/hub/api/tests/test_api_with_compression.py index 501427d48d..e38fc8ef8d 100644 --- a/hub/api/tests/test_api_with_compression.py +++ b/hub/api/tests/test_api_with_compression.py @@ -2,7 +2,7 @@ import pytest from hub.api.tensor import Tensor from hub.tests.common import TENSOR_KEY -from hub.tests.dataset_fixtures import all_enabled_datasets +from hub.tests.dataset_fixtures import enabled_datasets from hub.constants import UNCOMPRESSED import numpy as np @@ -34,7 +34,7 @@ def _populate_compressed_samples(tensor: Tensor, cat_path, flower_path, count=1) return original_compressions -@all_enabled_datasets +@enabled_datasets def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path): images = ds.create_tensor(TENSOR_KEY, htype="image") @@ -53,7 +53,7 @@ def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path): assert images.shape_interval.upper == (5, 900, 900, 4) -@all_enabled_datasets +@enabled_datasets def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path): images = ds.create_tensor(TENSOR_KEY, htype="image") @@ -81,7 +81,7 @@ def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path): assert x.shape == expected_shape -@all_enabled_datasets +@enabled_datasets def test_uncompressed(ds: Dataset): images = ds.create_tensor(TENSOR_KEY, sample_compression=UNCOMPRESSED) diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py index be2d5ea8b2..4c276b31d1 100644 --- a/hub/tests/dataset_fixtures.py +++ b/hub/tests/dataset_fixtures.py @@ -3,9 +3,15 @@ from hub import Dataset -all_enabled_datasets = pytest.mark.parametrize( +enabled_datasets = pytest.mark.parametrize( "ds", - ["memory_ds", "local_ds", "s3_ds"], # TODO: add hub cloud + ["memory_ds", "local_ds", "s3_ds", "hub_cloud_ds"], # TODO: add hub cloud + indirect=True, +) + +enabled_persistent_dataset_generators = pytest.mark.parametrize( + "ds_generator", + ["local_ds_generator", "s3_ds_generator", "hub_cloud_ds_generator"], indirect=True, ) @@ -29,8 +35,16 @@ def generate_local_ds(): @pytest.fixture -def s3_ds(s3_path): - return Dataset(s3_path) +def s3_ds(s3_ds_generator): + return s3_ds_generator() + + +@pytest.fixture +def s3_ds_generator(s3_path): + def generate_s3_ds(): + return Dataset(s3_path) + + return generate_s3_ds @pytest.fixture @@ -50,3 +64,9 @@ def generate_hub_cloud_ds(): def ds(request): """Used with parametrize to use all enabled dataset fixtures.""" return request.getfixturevalue(request.param) + + +@pytest.fixture +def ds_generator(request): + """Used with parametrize to use all enabled persistent dataset generator fixtures.""" + return request.getfixturevalue(request.param) From 69ed6ec58b984623305d7bc78adfe8c0ef6881bf Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Mon, 12 Jul 2021 18:18:56 -0700 Subject: [PATCH 06/33] remove commented out code --- conftest.py | 145 ---------------------------------------------------- 1 file changed, 145 deletions(-) diff --git a/conftest.py b/conftest.py index 52258fe065..9e7112a48b 100644 --- a/conftest.py +++ b/conftest.py @@ -12,7 +12,6 @@ from hub.constants import * from hub.tests.common import SESSION_ID -from hub.util.cache_chain import get_cache_chain # import * so all fixtures can be used accross the project from hub.tests.path_fixtures import * @@ -59,110 +58,6 @@ def pytest_addoption(parser): ) -@pytest.fixture -def marks(request): - """Fixture that gets all `@pytest.mark`s. If a test is marked with - `@pytest.mark.some_mark` the list this fixture returns will contain - `some_mark` as a string. - """ - - marks = [m.name for m in request.node.iter_markers()] - if request.node.parent: - marks += [m.name for m in request.node.parent.iter_markers()] - yield marks - - -def _storage_from_request(request): - requested_providers = request.param.split(",") - - # --cache-chains-only force enables --cache-chains - use_cache_chains_only = _is_opt_true(request, CACHE_ONLY_OPT) - use_cache_chains = ( - _is_opt_true(request, CACHE_OPT) - or use_cache_chains_only - or _is_opt_true(request, FULL_BENCHMARK_OPT) - ) - - if use_cache_chains_only and len(requested_providers) <= 1: - pytest.skip() - - if not use_cache_chains and len(requested_providers) > 1: - pytest.skip() - - storage_providers = [] - cache_sizes = [] - - if MEMORY in requested_providers: - if _is_opt_true(request, MEMORY_OPT): - pytest.skip() - - storage_providers.append(_get_memory_provider(request)) - cache_sizes.append(MIN_FIRST_CACHE_SIZE) - if LOCAL in requested_providers: - if not _is_opt_true(request, LOCAL_OPT): - pytest.skip() - - storage_providers.append(_get_local_provider(request)) - cache_size = MIN_FIRST_CACHE_SIZE if not cache_sizes else MIN_SECOND_CACHE_SIZE - cache_sizes.append(cache_size) - if S3 in requested_providers: - if not _is_opt_true(request, S3_OPT): - pytest.skip() - - storage_providers.append(_get_s3_provider(request)) - - if len(storage_providers) == len(cache_sizes): - cache_sizes.pop() - - return get_cache_chain(storage_providers, cache_sizes) - - -# @pytest.fixture -# def memory_storage(request): -# if not _is_opt_true(request, MEMORY_OPT): -# return _get_memory_provider(request) -# pytest.skip() -# -# -# @pytest.fixture -# def local_storage(request): -# if _is_opt_true(request, LOCAL_OPT): -# return _get_local_provider(request) -# pytest.skip() -# -# -# @pytest.fixture -# def s3_storage(request): -# if _is_opt_true(request, S3_OPT): -# return _get_s3_provider(request) -# pytest.skip() -# -# -# @pytest.fixture -# def storage(request): -# return _storage_from_request(request) -# -# -# @pytest.fixture -# def memory_ds(memory_storage): -# return _get_dataset(memory_storage) -# -# -# @pytest.fixture -# def local_ds(local_storage): -# return _get_dataset(local_storage) -# -# -# @pytest.fixture -# def s3_ds(s3_storage): -# return _get_dataset(s3_storage) -# -# -# @pytest.fixture -# def ds(request): -# return _get_dataset(_storage_from_request(request)) - - def print_session_id(): print("\n\n----------------------------------------------------------") print(f"Testing session ID: {SESSION_ID}") @@ -170,43 +65,3 @@ def print_session_id(): print_session_id() - - -# def _clear_storages(request): -# # clear memory -# if not _is_opt_true(request, MEMORY_OPT): -# storage = _get_storage_provider(request, MEMORY, with_current_test_name=False) -# storage.clear() -# -# # clear local -# if _is_opt_true(request, LOCAL_OPT): -# storage = _get_storage_provider(request, LOCAL, with_current_test_name=False) -# storage.clear() -# -# # clear s3 -# if _is_opt_true(request, S3_OPT): -# storage = _get_storage_provider(request, S3, with_current_test_name=False) -# storage.clear() -# -# -# @pytest.fixture(scope="session", autouse=True) -# def clear_storages_session(request): -# # executed before the first test -# _clear_storages(request) -# -# yield -# -# # executed after the last test -# print_session_id() -# -# -# @pytest.fixture(scope="function", autouse=True) -# def clear_storages_function(request): -# # executed before the current test -# -# yield -# -# # executed after the current test -# if not _is_opt_true(request, KEEP_STORAGE_OPT): -# _clear_storages(request) -# From 3eb50ed4bc09d3fc65296905c5720636f8734bf6 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Mon, 12 Jul 2021 23:14:31 -0700 Subject: [PATCH 07/33] remove skipif and benchmarks --- hub/cli/test_cli.py | 7 +- hub/client/test_client.py | 7 +- .../tests/test_benchmark_storage_provider.py | 77 ------------------- 3 files changed, 6 insertions(+), 85 deletions(-) delete mode 100644 hub/core/storage/tests/test_benchmark_storage_provider.py diff --git a/hub/cli/test_cli.py b/hub/cli/test_cli.py index f21e4a07ea..791dbe2f4d 100644 --- a/hub/cli/test_cli.py +++ b/hub/cli/test_cli.py @@ -7,12 +7,11 @@ from hub.cli.auth import login, logout -@pytest.mark.skipif(not has_hub_testing_creds(), reason="requires hub credentials") -def test_cli_auth(): +def test_cli_auth(hub_dev_credentials): + username, password = hub_dev_credentials + runner = CliRunner() - username = "testingacc" - password = os.getenv("ACTIVELOOP_HUB_PASSWORD") result = runner.invoke(login, f"-u {username} -p {password}") assert result.exit_code == 0 assert result.output == "Successfully logged in to Activeloop.\n" diff --git a/hub/client/test_client.py b/hub/client/test_client.py index 84db9ac0be..a1030cc23d 100644 --- a/hub/client/test_client.py +++ b/hub/client/test_client.py @@ -10,10 +10,9 @@ ) -@pytest.mark.skipif(not has_hub_testing_creds(), reason="requires hub credentials") -def test_client_requests(): - username = "testingacc" - password = os.getenv("ACTIVELOOP_HUB_PASSWORD") +def test_client_requests(hub_dev_credentials): + username, password = hub_dev_credentials + hub_client = HubBackendClient() hub_client.request_auth_token(username, password) with pytest.raises(Exception): diff --git a/hub/core/storage/tests/test_benchmark_storage_provider.py b/hub/core/storage/tests/test_benchmark_storage_provider.py deleted file mode 100644 index 4716ea9ab6..0000000000 --- a/hub/core/storage/tests/test_benchmark_storage_provider.py +++ /dev/null @@ -1,77 +0,0 @@ -import pytest - -from hub.constants import MB -from hub.tests.common_benchmark import ( - parametrize_benchmark_chunk_sizes, - BENCHMARK_CHUNK_SIZES, -) -from hub.core.tests.common import parametrize_all_caches, parametrize_all_storages -from hub.core.storage.tests.test_storage_provider import KEY # type: ignore - - -SIMULATED_DATA_SIZES = [128 * MB] - -# caclulate the number of chunks needed for each entry in `SIMULATED_DATA_SIZES` -NUM_CHUNKS = [] -for chunk_size in BENCHMARK_CHUNK_SIZES: - for data_size in SIMULATED_DATA_SIZES: - NUM_CHUNKS.append(data_size // chunk_size) - - -parametrize_benchmark_num_chunks = pytest.mark.parametrize("num_chunks", NUM_CHUNKS) - - -def write_to_files(storage, chunk_size, num_chunks): - chunk = b"1" * chunk_size - for i in range(num_chunks): - storage[f"{KEY}_{i}"] = chunk - storage.flush() - - -def read_from_files(storage, num_chunks): - for i in range(num_chunks): - storage[f"{KEY}_{i}"] - - -@pytest.mark.benchmark(group="storage_without_caches_write") -@parametrize_all_storages -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_storage_write_speeds(benchmark, storage, chunk_size, num_chunks): - benchmark(write_to_files, storage, chunk_size, num_chunks) - - -@pytest.mark.benchmark(group="storage_with_caches_write") -@parametrize_all_caches -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_cache_write_speeds(benchmark, storage, chunk_size, num_chunks): - benchmark(write_to_files, storage, chunk_size, num_chunks) - - -@pytest.mark.benchmark(group="storage_without_caches_read") -@parametrize_all_storages -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_storage_read_speeds(benchmark, storage, chunk_size, num_chunks): - write_to_files(storage, chunk_size, num_chunks) - benchmark(read_from_files, storage, num_chunks) - - -@pytest.mark.benchmark(group="storage_with_caches_read") -@parametrize_all_caches -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_cache_read_speeds(benchmark, storage, chunk_size, num_chunks): - write_to_files(storage, chunk_size, num_chunks) - benchmark(read_from_files, storage, num_chunks) - - -@pytest.mark.benchmark(group="storage_with_caches_read_write") -@parametrize_all_caches -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_full_cache_read_speeds(benchmark, storage, chunk_size, num_chunks): - write_to_files(storage, chunk_size, num_chunks) - read_from_files(storage, num_chunks) - benchmark(read_from_files, storage, num_chunks) From 03afe2aea4aaa2681fc4a00ff9d55c907413dc44 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:12:36 -0700 Subject: [PATCH 08/33] add cache chain fixtures --- conftest.py | 16 +----- hub/cli/test_cli.py | 4 +- hub/client/test_client.py | 6 +-- hub/constants.py | 2 - hub/core/storage/tests/test_readonly.py | 12 ++--- .../storage/tests/test_storage_provider.py | 32 +++-------- hub/integrations/tests/test_pytorch.py | 13 +++-- hub/tests/cache_fixtures.py | 53 +++++++++++++++++++ hub/tests/client_fixtures.py | 6 +-- hub/tests/dataset_fixtures.py | 5 +- hub/tests/path_fixtures.py | 6 +-- hub/tests/storage_fixtures.py | 9 ++-- hub/util/check_installation.py | 4 +- hub/util/tests/test_from_tfds.py | 9 ++-- 14 files changed, 97 insertions(+), 80 deletions(-) create mode 100644 hub/tests/cache_fixtures.py diff --git a/conftest.py b/conftest.py index 9e7112a48b..9772f23d5d 100644 --- a/conftest.py +++ b/conftest.py @@ -1,7 +1,5 @@ import os -import posixpath import logging -import pytest # Disable crash reporting before running tests # This MUST come before hub imports to bypass import publication. @@ -17,6 +15,7 @@ from hub.tests.path_fixtures import * from hub.tests.dataset_fixtures import * from hub.tests.storage_fixtures import * +from hub.tests.cache_fixtures import * from hub.tests.client_fixtures import * @@ -31,19 +30,6 @@ def pytest_addoption(parser): parser.addoption( HUB_CLOUD_OPT, action="store_true", help="Hub cloud tests will run if enabled." ) - parser.addoption( - CACHE_OPT, - action="store_true", - help="Tests using the `storage` fixture may run with combinations of all enabled providers in cache chains. " - f"For example, if the option `{S3_OPT}` is not provided, all cache chains that use `S3Provider`" - " are skipped.", - ) - parser.addoption( - CACHE_ONLY_OPT, - action="store_true", - help=f"Force enables `{CACHE_OPT}`. `storage` fixture only returns cache chains. For example, if `{S3_OPT}` is provided, \ - `storage` will never be just `S3Provider`.", - ) parser.addoption( S3_PATH_OPT, type=str, diff --git a/hub/cli/test_cli.py b/hub/cli/test_cli.py index 791dbe2f4d..aec4a727c5 100644 --- a/hub/cli/test_cli.py +++ b/hub/cli/test_cli.py @@ -7,8 +7,8 @@ from hub.cli.auth import login, logout -def test_cli_auth(hub_dev_credentials): - username, password = hub_dev_credentials +def test_cli_auth(hub_cloud_dev_credentials): + username, password = hub_cloud_dev_credentials runner = CliRunner() diff --git a/hub/client/test_client.py b/hub/client/test_client.py index a1030cc23d..6f525a771c 100644 --- a/hub/client/test_client.py +++ b/hub/client/test_client.py @@ -1,5 +1,3 @@ -import os - import pytest from hub.client.client import HubBackendClient @@ -10,8 +8,8 @@ ) -def test_client_requests(hub_dev_credentials): - username, password = hub_dev_credentials +def test_client_requests(hub_cloud_dev_credentials): + username, password = hub_cloud_dev_credentials hub_client = HubBackendClient() hub_client.request_auth_token(username, password) diff --git a/hub/constants.py b/hub/constants.py index 6bf7d87218..d222c9ef98 100644 --- a/hub/constants.py +++ b/hub/constants.py @@ -61,6 +61,4 @@ S3_OPT = "--s3" HUB_CLOUD_OPT = "--hub-cloud" S3_PATH_OPT = "--s3-path" -CACHE_OPT = "--cache-chains" -CACHE_ONLY_OPT = "--cache-chains-only" KEEP_STORAGE_OPT = "--keep-storage" diff --git a/hub/core/storage/tests/test_readonly.py b/hub/core/storage/tests/test_readonly.py index e6d2aee019..b901ede0a0 100644 --- a/hub/core/storage/tests/test_readonly.py +++ b/hub/core/storage/tests/test_readonly.py @@ -1,39 +1,39 @@ import pytest -from hub.core.tests.common import parametrize_all_storages_and_caches from hub.util.exceptions import ReadOnlyModeError from hub import Dataset +from hub.tests.storage_fixtures import all_enabled_storages @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@all_enabled_storages def test_readonly_del(storage): storage.enable_readonly() del storage["help!"] @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@all_enabled_storages def test_readonly_set(storage): storage.enable_readonly() storage["help! im stuck in a storage provider!"] = "salvation" @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@all_enabled_storages def test_readonly_clear(storage): storage.enable_readonly() storage.clear() @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@all_enabled_storages def test_readonly_flush(storage): storage.enable_readonly() storage.flush() @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@all_enabled_storages def test_readonly_ds_create_tensor(storage): ds = Dataset(read_only=True, storage=storage) ds.create_tensor("test") diff --git a/hub/core/storage/tests/test_storage_provider.py b/hub/core/storage/tests/test_storage_provider.py index 4bf45f4cd0..828ed80d66 100644 --- a/hub/core/storage/tests/test_storage_provider.py +++ b/hub/core/storage/tests/test_storage_provider.py @@ -1,4 +1,5 @@ -from hub.tests.path_fixtures import ALL_STORAGES, MEMORY, S3, LOCAL +from hub.tests.storage_fixtures import all_enabled_storages +from hub.tests.cache_fixtures import all_enabled_cache_chains import pytest from click.testing import CliRunner @@ -8,23 +9,6 @@ KEY = "file" -STORAGE_FIXTURE_NAME = "storage" - - -_ALL_CACHES_TUPLES = [(MEMORY, LOCAL), (MEMORY, S3), (LOCAL, S3), (MEMORY, LOCAL, S3)] -ALL_CACHES = list(map(lambda i: ",".join(i), _ALL_CACHES_TUPLES)) - -parametrize_all_storages = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_STORAGES, - indirect=True, -) - -parametrize_all_caches = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_CACHES, - indirect=True, -) # helper functions for tests def check_storage_provider(storage): @@ -131,18 +115,18 @@ def check_cache(cache): check_cache_state(cache, expected_state=[set(), set(), 0, 0, 0, 0]) -@parametrize_all_storages +@all_enabled_storages def test_storage_provider(storage): check_storage_provider(storage) -@parametrize_all_caches -def test_cache(storage): - check_storage_provider(storage) - check_cache(storage) +@all_enabled_cache_chains +def test_cache(cache_chain): + check_storage_provider(cache_chain) + check_cache(cache_chain) -@parametrize_all_storages +@all_enabled_storages def test_pickling(storage): with CliRunner().isolated_filesystem(): FILE_1 = f"{KEY}_1" diff --git a/hub/integrations/tests/test_pytorch.py b/hub/integrations/tests/test_pytorch.py index ffcbade3a5..f635c192f0 100644 --- a/hub/integrations/tests/test_pytorch.py +++ b/hub/integrations/tests/test_pytorch.py @@ -1,4 +1,3 @@ -import sys from hub.util.remove_cache import get_base_storage import pytest from hub.util.exceptions import DatasetUnsupportedPytorch @@ -9,7 +8,7 @@ from hub.integrations.pytorch_old import dataset_to_pytorch from hub.util.check_installation import requires_torch -from hub.core.tests.common import parametrize_all_dataset_storages +from hub.tests.dataset_fixtures import enabled_datasets def to_tuple(sample): @@ -17,7 +16,7 @@ def to_tuple(sample): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_pytorch_small(ds): with ds: ds.create_tensor("image") @@ -78,7 +77,7 @@ def test_pytorch_small(ds): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_pytorch_transform(ds): with ds: ds.create_tensor("image") @@ -103,7 +102,7 @@ def test_pytorch_transform(ds): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_pytorch_with_compression(ds: Dataset): # TODO: chunk-wise compression for labels (right now they are uncompressed) with ds: @@ -128,7 +127,7 @@ def test_pytorch_with_compression(ds: Dataset): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_pytorch_small_old(ds): with ds: ds.create_tensor("image") @@ -158,7 +157,7 @@ def test_pytorch_small_old(ds): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets @pytest.mark.skip(reason="future") def test_custom_tensor_order(ds): with ds: diff --git a/hub/tests/cache_fixtures.py b/hub/tests/cache_fixtures.py new file mode 100644 index 0000000000..fef55a174c --- /dev/null +++ b/hub/tests/cache_fixtures.py @@ -0,0 +1,53 @@ +from hub.util.cache_chain import get_cache_chain +from hub.constants import MIN_FIRST_CACHE_SIZE, MIN_SECOND_CACHE_SIZE +import pytest + +_MEMORY = "memory_storage" +_LOCAL = "local_storage" +_S3 = "s3_storage" +_HUB_CLOUD = "hub_cloud_storage" + +CACHE_CHAINS = [ + (_MEMORY, _LOCAL), + (_MEMORY, _S3), + (_MEMORY, _HUB_CLOUD), + (_LOCAL, _S3), + (_LOCAL, _HUB_CLOUD), + (_MEMORY, _LOCAL, _S3), + (_MEMORY, _LOCAL, _HUB_CLOUD), +] +CACHE_CHAINS = list(map(lambda i: ",".join(i), CACHE_CHAINS)) + + +all_enabled_cache_chains = pytest.mark.parametrize( + "cache_chain", + CACHE_CHAINS, + indirect=True, +) + + +@pytest.fixture +def cache_chain(request): + requested_storages = request.param.split(",") + + storages = [] + cache_sizes = [] + + # will automatically skip if a storage is not enabled + for requested_storage in requested_storages: + storage = request.getfixturevalue(requested_storage) + storages.append(storage) + + if len(cache_sizes) == 0: + cache_sizes.append(MIN_FIRST_CACHE_SIZE) + elif len(cache_sizes) == 1: + cache_sizes.append(MIN_SECOND_CACHE_SIZE) + + if len(storages) == len(cache_sizes): + cache_sizes.pop() + + assert ( + len(storages) == len(cache_sizes) + 1 + ), f"Invalid test composition. {len(storages)} != {len(cache_sizes)} - 1" + + return get_cache_chain(storages, cache_sizes) diff --git a/hub/tests/client_fixtures.py b/hub/tests/client_fixtures.py index 96608b5bac..68e07d41fb 100644 --- a/hub/tests/client_fixtures.py +++ b/hub/tests/client_fixtures.py @@ -7,7 +7,7 @@ @pytest.fixture(scope="session") -def hub_dev_credentials(request): +def hub_cloud_dev_credentials(request): # TODO: skipif if not is_opt_true(request, HUB_CLOUD_OPT): pytest.skip() @@ -19,8 +19,8 @@ def hub_dev_credentials(request): @pytest.fixture(scope="session") -def hub_testing_token(hub_dev_credentials): - username, password = hub_dev_credentials +def hub_cloud_dev_token(hub_cloud_dev_credentials): + username, password = hub_cloud_dev_credentials client = HubBackendClient() token = client.request_auth_token(username, password) diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py index 4c276b31d1..d8711326d9 100644 --- a/hub/tests/dataset_fixtures.py +++ b/hub/tests/dataset_fixtures.py @@ -1,4 +1,3 @@ -import os import pytest from hub import Dataset @@ -53,9 +52,9 @@ def hub_cloud_ds(hub_cloud_ds_generator): @pytest.fixture -def hub_cloud_ds_generator(hub_cloud_path, hub_testing_token): +def hub_cloud_ds_generator(hub_cloud_path, hub_cloud_dev_token): def generate_hub_cloud_ds(): - return Dataset(hub_cloud_path, token=hub_testing_token) + return Dataset(hub_cloud_path, token=hub_cloud_dev_token) return generate_hub_cloud_ds diff --git a/hub/tests/path_fixtures.py b/hub/tests/path_fixtures.py index 2f158cfeaa..e78928eaf4 100644 --- a/hub/tests/path_fixtures.py +++ b/hub/tests/path_fixtures.py @@ -27,8 +27,6 @@ S3 = "s3" HUB_CLOUD = "hub_cloud" -ALL_STORAGES = [MEMORY, LOCAL, S3] - def _get_path_composition_configs(request): return { @@ -126,7 +124,7 @@ def s3_path(request): @pytest.fixture -def hub_cloud_path(request, hub_testing_token): +def hub_cloud_path(request, hub_cloud_dev_token): # TODO: can probably generalize these fixtures # TODO: skipif @@ -140,7 +138,7 @@ def hub_cloud_path(request, hub_testing_token): # clear storage unless flagged otherwise if not is_opt_true(request, KEEP_STORAGE_OPT): - storage_provider_from_hub_path(path, token=hub_testing_token).clear() + storage_provider_from_hub_path(path, token=hub_cloud_dev_token).clear() @pytest.fixture diff --git a/hub/tests/storage_fixtures.py b/hub/tests/storage_fixtures.py index 056a2db5ef..723566c8d4 100644 --- a/hub/tests/storage_fixtures.py +++ b/hub/tests/storage_fixtures.py @@ -1,14 +1,12 @@ from hub.core.storage.s3 import S3Provider from hub.core.storage.local import LocalProvider from hub.core.storage.memory import MemoryProvider -from hub.api.dataset import Dataset -from hub.core.storage.provider import StorageProvider import pytest all_enabled_storages = pytest.mark.parametrize( "storage", - ["memory_storage", "local_storage", "s3_storage"], + ["memory_storage", "local_storage", "s3_storage", "hub_cloud_storage"], indirect=True, ) @@ -28,6 +26,11 @@ def s3_storage(s3_path): return S3Provider(s3_path) +@pytest.fixture +def hub_cloud_storage(hub_cloud_path, hub_cloud_token): + return S3Provider(hub_cloud_path, token=hub_cloud_token) + + @pytest.fixture def storage(request): """Used with parametrize to use all enabled storage fixtures.""" diff --git a/hub/util/check_installation.py b/hub/util/check_installation.py index 0ccb8a5c86..d098a4e22f 100644 --- a/hub/util/check_installation.py +++ b/hub/util/check_installation.py @@ -21,7 +21,7 @@ def tensorflow_installed(): return True -def tfds_installed(): +def _tfds_installed(): try: import tensorflow_datasets # type: ignore @@ -40,5 +40,5 @@ def tfds_installed(): ) requires_tfds = pytest.mark.skipif( - not tfds_installed(), reason="requires tensorflow_datasets to be installed" + not _tfds_installed(), reason="requires tensorflow_datasets to be installed" ) diff --git a/hub/util/tests/test_from_tfds.py b/hub/util/tests/test_from_tfds.py index f7e3de539c..5f191b41db 100644 --- a/hub/util/tests/test_from_tfds.py +++ b/hub/util/tests/test_from_tfds.py @@ -1,13 +1,12 @@ import numpy as np -import pytest -from hub.core.tests.common import parametrize_all_dataset_storages -from hub.util.check_installation import tfds_installed, requires_tfds, requires_tensorflow # type: ignore +from hub.util.check_installation import requires_tfds, requires_tensorflow # type: ignore +from hub.tests.dataset_fixtures import enabled_datasets @requires_tfds def test_from_tfds_to_path(local_storage): - from hub.util.from_tfds import from_tfds, from_tfds_to_path # type: ignore + from hub.util.from_tfds import from_tfds_to_path # type: ignore hub_ds = from_tfds_to_path( tfds_dataset_name="mnist", @@ -20,7 +19,7 @@ def test_from_tfds_to_path(local_storage): @requires_tensorflow @requires_tfds -@parametrize_all_dataset_storages +@enabled_datasets def test_from_tfds(ds): import tensorflow_datasets as tfds # type: ignore from hub.util.from_tfds import from_tfds, from_tfds_to_path # type: ignore From f6b311523dffe00e587654d6876221894586fb9a Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:13:31 -0700 Subject: [PATCH 09/33] update circleci to remove --cache-chains param --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c552a8e04f..3e765b8336 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -169,7 +169,7 @@ commands: command: | $Env:GOOGLE_APPLICATION_CREDENTIALS = $Env:CI_GCS_PATH setx /m GOOGLE_APPLICATION_CREDENTIALS "$Env:GOOGLE_APPLICATION_CREDENTIALS" - python3 -m pytest --cov-report=xml --cov=./ --local --s3 --cache-chains + python3 -m pytest --cov-report=xml --cov=./ --local --s3 --hub-cloud - when: condition: << parameters.unix-like >> steps: @@ -177,7 +177,7 @@ commands: name: "Running tests - Unix" command: | export GOOGLE_APPLICATION_CREDENTIALS=$HOME/.secrets/gcs.json - python3 -m pytest --cov-report=xml --cov=./ --local --s3 --cache-chains + python3 -m pytest --cov-report=xml --cov=./ --local --s3 --hub-cloud parallelism: 10 codecov-upload: From 09fe56a4ebda89cd03a14e0c15c28ea189858888 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:15:21 -0700 Subject: [PATCH 10/33] rename local test --- hub/api/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 69cb64f88a..5ec5a573a3 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -64,7 +64,7 @@ def test_persist_with(ds_generator): @enabled_persistent_dataset_generators -def test_persist_local_clear_cache(ds_generator): +def test_persist_clear_cache(ds_generator): ds = ds_generator() ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) From 790753564790a99e2fa83fab2011931451cebbe0 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:33:16 -0700 Subject: [PATCH 11/33] update contributing.md and rename parametrizes --- CONTRIBUTING.md | 139 +++++++----------- hub/api/tests/test_api.py | 1 - hub/core/storage/tests/test_readonly.py | 12 +- .../storage/tests/test_storage_provider.py | 10 +- hub/tests/cache_fixtures.py | 2 +- hub/tests/storage_fixtures.py | 2 +- 6 files changed, 64 insertions(+), 102 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 51c9b2c1ae..ca49b92120 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,6 +24,16 @@ pip3 install -r requirements/plugins.txt pip3 install -r requirements/tests.txt ``` +## Generating API Docs + +Hub used pdocs3 to generate docs: https://pdoc3.github.io/pdoc/ +API docs are hosted at: https://api-docs.activeloop.ai/ + +Run the below command to generate API documentation: +``` + pdoc3 --html --output-dir api_docs --template-dir pdoc/templates hub +``` + ### Running Tests - To run memory only tests, use: `python -m pytest .`. @@ -32,111 +42,64 @@ pip3 install -r requirements/tests.txt - To run cache chain only tests, use: `python -m pytest . --local --s3 --cache-chains-only`. Note: you can opt out of `--local` or `--s3`, the cache chains produced will only contain enabled storage providers. - To run ALL tests, use: `python -m pytest . --local --s3 --cache-chains`. -### Prerequisites +### Extra Resources - Understand how to write [pytest](https://docs.pytest.org/en/6.2.x/) tests. - Understand what a [pytest fixture](https://docs.pytest.org/en/6.2.x/fixture.html) is. - Understand what [pytest parametrizations](https://docs.pytest.org/en/6.2.x/parametrize.html) are. - ### Options -To see a list of our custom pytest options, run this command: `pytest -h | sed -En '/custom options:/,/\[pytest\] ini\-options/p'`. +Combine any of the following options to suit your test cases. -### Fixtures -You can find more information on pytest fixtures [here](https://docs.pytest.org/en/6.2.x/fixture.html). - -- `memory_storage`: If `--memory-skip` is provided, tests with this fixture will be skipped. Otherwise, the test will run with only a `MemoryProvider`. -- `local_storage`: If `--local` is **not** provided, tests with this fixture will be skipped. Otherwise, the test will run with only a `LocalProvider`. -- `s3_storage`: If `--s3` is **not** provided, tests with this fixture will be skipped. Otherwise, the test will run with only an `S3Provider`. -- `storage`: All tests that use the `storage` fixture will be parametrized with the enabled `StorageProvider`s (enabled via options defined below). If `--cache-chains` is provided, `storage` may also be a cache chain. Cache chains have the same interface as `StorageProvider`, but instead of just a single provider, it is multiple chained in a sequence, where the last provider in the chain is considered the actual storage. -- `ds`: The same as the `storage` fixture, but the storages that are parametrized are wrapped with a `Dataset`. - -Each `StorageProvider`/`Dataset` that is created for a test via a fixture will automatically have a root created before and destroyed after the test. If you want to keep this data after the test run, you can use the `--keep-storage` option. +- `--local`: Enable local tests. +- `--s3`: Enable S3 tests. +- `--hub-cloud`: Enable hub cloud tests. +- `--memory-skip`: Disable memory tests. +- `--s3-path`: Specify an s3 path if you don't have access to our internal testing bucket. +- `--keep-storage`: By default all storages are cleaned up after tests run. Enable this option if you need to check the storage contents. +#### Options Examples +- `pytest .`: Run all tests with memory only. +- `pytest . --local`: Run all tests with memory and local. +- `pytest . --s3`: Run all tests with memory and s3. +- `pytest . --memory-skip --hub-cloud`: Run all tests with hub cloud only. #### Fixture Examples - -Single storage provider fixture +Datasets ```python -def test_memory(memory_storage): - # test will skip if `--memory-skip` is provided - memory_storage["key"] = b"1234" # this data will only be stored in memory - -def test_local(local_storage): - # test will skip if `--local` is not provided - memory_storage["key"] = b"1234" # this data will only be stored locally - -def test_local(s3_storage): - # test will skip if `--s3` is not provided - # test will fail if credentials are not provided - memory_storage["key"] = b"1234" # this data will only be stored in s3 -``` +@enabled_datasets +def test_dataset(ds: Dataset): + # this test will run once per enabled storage provider. if no providers are explicitly enabled, + # only memory will be used. + pass -Multiple storage providers/cache chains -```python -from hub.core.tests.common import parametrize_all_storages, parametrize_all_caches, parametrize_all_storages_and_caches - -@parametrize_all_storages -def test_storage(storage): - # storage will be parametrized with all enabled `StorageProvider`s - pass - -@parametrize_all_caches -def test_caches(storage): - # storage will be parametrized with all common caches containing enabled `StorageProvider`s - pass - -@parametrize_all_storages_and_caches -def test_storages_and_caches(storage): - # storage will be parametrized with all enabled `StorageProvider`s and common caches containing enabled `StorageProvider`s - pass -``` +def test_local_dataset(local_ds: Dataset): + # this test will run only once with a local dataset. if the `--local` option is not provided, + # this test will be skipped. + pass +``` -Dataset storage providers/cache chains +Storages ```python -from hub.core.tests.common import parametrize_all_dataset_storages, parametrize_all_dataset_storages_and_caches +@enabled_storages +def test_storage(storage: StorageProvider): + # this test will run once per enabled storage provider. if no providers are explicitly enabled, + # only memory will be used. + pass -@parametrize_all_dataset_storages -def test_dataset(ds): - # `ds` will be parametrized with 1 `Dataset` object per enabled `StorageProvider` - pass -@parametrize_all_dataset_storages_and_caches -def test_dataset(ds): - # `ds` will be parametrized with 1 `Dataset` object per enabled `StorageProvider` and all cache chains containing enabled `StorageProvider`s - pass +def test_local_storage(local_storage: StorageProvider): + # this test will run only once with a local storage provider. if the `--local` option is not provided, + # this test will be skipped. + pass ``` -## Benchmarks -We use [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/latest/usage.html) for our benchmark code which is a plugin for [pytest](https://docs.pytest.org/en/6.2.x/). - -### Running Benchmarks -- To run benchmarks for memory only, use: - - ```python -m pytest . --benchmark-only``` - -- To run ALL **fast** benchmarks, use: - - ```python -m pytest . --local --s3 --cache-chains --benchmark-only```. - - Note: this only runs the subset of benchmarks that finish quickly. - -- To run ALL **fast AND slow** benchmarks, use: - - ```python -m pytest . --local --s3 --full-benchmarks --benchmark-only``` - - Note: this will take a while... (also cache chains are implicitly enabled from `--full-benchmarks`.) - -- You can opt out of `--local` and `--s3` for all commands, or add `--memory-skip`. Also `--cache-chains-only` works. -- Optionally, you can remove the `--benchmark-only` flag in any of these commands to run normal tests alongside the benchmarks. - -## Generating API Docs - -Hub used pdocs3 to generate docs: https://pdoc3.github.io/pdoc/ -API docs are hosted at: https://api-docs.activeloop.ai/ - -Run the below command to generate API documentation: -``` - pdoc3 --html --output-dir api_docs --template-dir pdoc/templates hub +Caches +```python +@enabled_cache_chains +def test_cache(cache: StorageProvider): # note: caches are provided as `StorageProvider`s + # this test runs for every cache chain that contains all enabled storage providers. + # if only memory is enabled (no providers are explicitly enabled), this test will be skipped. + pass ``` \ No newline at end of file diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 5ec5a573a3..a1b3dfe7ca 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -7,7 +7,6 @@ from hub.api.dataset import Dataset from hub.tests.common import assert_array_lists_equal from hub.util.exceptions import TensorDtypeMismatchError, TensorInvalidSampleShapeError -from hub.client.client import HubBackendClient from click.testing import CliRunner from hub.tests.dataset_fixtures import ( enabled_datasets, diff --git a/hub/core/storage/tests/test_readonly.py b/hub/core/storage/tests/test_readonly.py index b901ede0a0..349f44467e 100644 --- a/hub/core/storage/tests/test_readonly.py +++ b/hub/core/storage/tests/test_readonly.py @@ -1,39 +1,39 @@ import pytest from hub.util.exceptions import ReadOnlyModeError from hub import Dataset -from hub.tests.storage_fixtures import all_enabled_storages +from hub.tests.storage_fixtures import enabled_storages @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@all_enabled_storages +@enabled_storages def test_readonly_del(storage): storage.enable_readonly() del storage["help!"] @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@all_enabled_storages +@enabled_storages def test_readonly_set(storage): storage.enable_readonly() storage["help! im stuck in a storage provider!"] = "salvation" @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@all_enabled_storages +@enabled_storages def test_readonly_clear(storage): storage.enable_readonly() storage.clear() @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@all_enabled_storages +@enabled_storages def test_readonly_flush(storage): storage.enable_readonly() storage.flush() @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@all_enabled_storages +@enabled_storages def test_readonly_ds_create_tensor(storage): ds = Dataset(read_only=True, storage=storage) ds.create_tensor("test") diff --git a/hub/core/storage/tests/test_storage_provider.py b/hub/core/storage/tests/test_storage_provider.py index 828ed80d66..93fc8dd25f 100644 --- a/hub/core/storage/tests/test_storage_provider.py +++ b/hub/core/storage/tests/test_storage_provider.py @@ -1,5 +1,5 @@ -from hub.tests.storage_fixtures import all_enabled_storages -from hub.tests.cache_fixtures import all_enabled_cache_chains +from hub.tests.storage_fixtures import enabled_storages +from hub.tests.cache_fixtures import enabled_cache_chains import pytest from click.testing import CliRunner @@ -115,18 +115,18 @@ def check_cache(cache): check_cache_state(cache, expected_state=[set(), set(), 0, 0, 0, 0]) -@all_enabled_storages +@enabled_storages def test_storage_provider(storage): check_storage_provider(storage) -@all_enabled_cache_chains +@enabled_cache_chains def test_cache(cache_chain): check_storage_provider(cache_chain) check_cache(cache_chain) -@all_enabled_storages +@enabled_storages def test_pickling(storage): with CliRunner().isolated_filesystem(): FILE_1 = f"{KEY}_1" diff --git a/hub/tests/cache_fixtures.py b/hub/tests/cache_fixtures.py index fef55a174c..7c4cf59a54 100644 --- a/hub/tests/cache_fixtures.py +++ b/hub/tests/cache_fixtures.py @@ -19,7 +19,7 @@ CACHE_CHAINS = list(map(lambda i: ",".join(i), CACHE_CHAINS)) -all_enabled_cache_chains = pytest.mark.parametrize( +enabled_cache_chains = pytest.mark.parametrize( "cache_chain", CACHE_CHAINS, indirect=True, diff --git a/hub/tests/storage_fixtures.py b/hub/tests/storage_fixtures.py index 723566c8d4..99ef5b5cbe 100644 --- a/hub/tests/storage_fixtures.py +++ b/hub/tests/storage_fixtures.py @@ -4,7 +4,7 @@ import pytest -all_enabled_storages = pytest.mark.parametrize( +enabled_storages = pytest.mark.parametrize( "storage", ["memory_storage", "local_storage", "s3_storage", "hub_cloud_storage"], indirect=True, From 8157ac87c890dc3461b54f7771893369adfaf651 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:33:56 -0700 Subject: [PATCH 12/33] fix mypy --- hub/tests/cache_fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hub/tests/cache_fixtures.py b/hub/tests/cache_fixtures.py index 7c4cf59a54..2924548568 100644 --- a/hub/tests/cache_fixtures.py +++ b/hub/tests/cache_fixtures.py @@ -16,7 +16,7 @@ (_MEMORY, _LOCAL, _S3), (_MEMORY, _LOCAL, _HUB_CLOUD), ] -CACHE_CHAINS = list(map(lambda i: ",".join(i), CACHE_CHAINS)) +CACHE_CHAINS = list(map(lambda i: ",".join(i), CACHE_CHAINS)) # type: ignore enabled_cache_chains = pytest.mark.parametrize( From 659b0ab6473809b9e78ce7aedfaaa88e39ebdb95 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:43:24 -0700 Subject: [PATCH 13/33] remove old test explanation --- CONTRIBUTING.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ca49b92120..ff3ea443d3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -34,14 +34,6 @@ Run the below command to generate API documentation: pdoc3 --html --output-dir api_docs --template-dir pdoc/templates hub ``` -### Running Tests - -- To run memory only tests, use: `python -m pytest .`. -- To run local only tests, use: `python -m pytest . --memory-skip --local`. -- To run s3 only tests, use: `python -m pytest . --memory-skip --s3`. -- To run cache chain only tests, use: `python -m pytest . --local --s3 --cache-chains-only`. Note: you can opt out of `--local` or `--s3`, the cache chains produced will only contain enabled storage providers. -- To run ALL tests, use: `python -m pytest . --local --s3 --cache-chains`. - ### Extra Resources - Understand how to write [pytest](https://docs.pytest.org/en/6.2.x/) tests. - Understand what a [pytest fixture](https://docs.pytest.org/en/6.2.x/fixture.html) is. From 7f20ce5d632f2c0b0a1c720e378845049d3b0e52 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:44:01 -0700 Subject: [PATCH 14/33] move api docs back down --- CONTRIBUTING.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ff3ea443d3..e5f0278c6d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,15 +24,6 @@ pip3 install -r requirements/plugins.txt pip3 install -r requirements/tests.txt ``` -## Generating API Docs - -Hub used pdocs3 to generate docs: https://pdoc3.github.io/pdoc/ -API docs are hosted at: https://api-docs.activeloop.ai/ - -Run the below command to generate API documentation: -``` - pdoc3 --html --output-dir api_docs --template-dir pdoc/templates hub -``` ### Extra Resources - Understand how to write [pytest](https://docs.pytest.org/en/6.2.x/) tests. @@ -94,4 +85,14 @@ def test_cache(cache: StorageProvider): # note: caches are provided as `Storage # this test runs for every cache chain that contains all enabled storage providers. # if only memory is enabled (no providers are explicitly enabled), this test will be skipped. pass +``` + +## Generating API Docs + +Hub used pdocs3 to generate docs: https://pdoc3.github.io/pdoc/ +API docs are hosted at: https://api-docs.activeloop.ai/ + +Run the below command to generate API documentation: +``` + pdoc3 --html --output-dir api_docs --template-dir pdoc/templates hub ``` \ No newline at end of file From 844b1bbe39f794e812cbe00e18fcd5d73c227b42 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:55:08 -0700 Subject: [PATCH 15/33] add memory showcase & explain where to find fixtures --- CONTRIBUTING.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e5f0278c6d..b2478dd73c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -47,6 +47,7 @@ Combine any of the following options to suit your test cases. - `pytest . --memory-skip --hub-cloud`: Run all tests with hub cloud only. #### Fixture Examples +These are not all of the available fixtures. You can see all of them [here](hub/tests/). Datasets ```python @@ -72,8 +73,8 @@ def test_storage(storage: StorageProvider): pass -def test_local_storage(local_storage: StorageProvider): - # this test will run only once with a local storage provider. if the `--local` option is not provided, +def test_memory_storage(memory_storage: StorageProvider): + # this test will run only once with a memory storage provider. if the `--memory-skip` option is provided, # this test will be skipped. pass ``` From 99a4a24d6320446635ce97a4f44143f80daf8d99 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:57:34 -0700 Subject: [PATCH 16/33] reformat contributing --- CONTRIBUTING.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b2478dd73c..f43395f5b9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,10 +25,12 @@ pip3 install -r requirements/tests.txt ``` -### Extra Resources -- Understand how to write [pytest](https://docs.pytest.org/en/6.2.x/) tests. -- Understand what a [pytest fixture](https://docs.pytest.org/en/6.2.x/fixture.html) is. -- Understand what [pytest parametrizations](https://docs.pytest.org/en/6.2.x/parametrize.html) are. +#### Running Tests +- `pytest .`: Run all tests with memory only. +- `pytest . --local`: Run all tests with memory and local. +- `pytest . --s3`: Run all tests with memory and s3. +- `pytest . --memory-skip --hub-cloud`: Run all tests with hub cloud only. + ### Options Combine any of the following options to suit your test cases. @@ -40,13 +42,16 @@ Combine any of the following options to suit your test cases. - `--s3-path`: Specify an s3 path if you don't have access to our internal testing bucket. - `--keep-storage`: By default all storages are cleaned up after tests run. Enable this option if you need to check the storage contents. -#### Options Examples -- `pytest .`: Run all tests with memory only. -- `pytest . --local`: Run all tests with memory and local. -- `pytest . --s3`: Run all tests with memory and s3. -- `pytest . --memory-skip --hub-cloud`: Run all tests with hub cloud only. -#### Fixture Examples +### Extra Resources +If you feel lost with any of these sections, try reading up on some of these topics. + +- Understand how to write [pytest](https://docs.pytest.org/en/6.2.x/) tests. +- Understand what a [pytest fixture](https://docs.pytest.org/en/6.2.x/fixture.html) is. +- Understand what [pytest parametrizations](https://docs.pytest.org/en/6.2.x/parametrize.html) are. + + +#### Fixture Usage Examples These are not all of the available fixtures. You can see all of them [here](hub/tests/). Datasets From 4fea794873abdc1360beeaa102ac9282d4249498 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 00:58:47 -0700 Subject: [PATCH 17/33] update section formatting --- CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f43395f5b9..4ff71ecf93 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -16,7 +16,7 @@ We also use static typing for our function arguments/variables for better code r ## Testing We use [pytest](https://docs.pytest.org/en/6.2.x/) for our tests. In order to make it easier, we also have a set of custom options defined in [conftest.py](conftest.py). -## To install all dependencies run: +### To install all dependencies run: ``` pip3 install -r requirements/common.txt @@ -25,7 +25,7 @@ pip3 install -r requirements/tests.txt ``` -#### Running Tests +### Running Tests - `pytest .`: Run all tests with memory only. - `pytest . --local`: Run all tests with memory and local. - `pytest . --s3`: Run all tests with memory and s3. @@ -51,7 +51,7 @@ If you feel lost with any of these sections, try reading up on some of these top - Understand what [pytest parametrizations](https://docs.pytest.org/en/6.2.x/parametrize.html) are. -#### Fixture Usage Examples +### Fixture Usage Examples These are not all of the available fixtures. You can see all of them [here](hub/tests/). Datasets From 741f7ef215b2b2c6696e2aab7a44a93c9b36ebea Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Tue, 13 Jul 2021 17:41:06 -0700 Subject: [PATCH 18/33] fix mypy --- hub/tests/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hub/tests/common.py b/hub/tests/common.py index 009592fe38..6751363841 100644 --- a/hub/tests/common.py +++ b/hub/tests/common.py @@ -2,7 +2,7 @@ from io import BytesIO import os import pathlib -from typing import List +from typing import List, Optional from uuid import uuid1 import numpy as np @@ -53,7 +53,7 @@ def get_dummy_data_path(subpath: str = ""): return os.path.join(_THIS_FILE, "dummy_data" + os.sep, subpath) -def get_actual_compression_from_buffer(buffer: memoryview) -> str: +def get_actual_compression_from_buffer(buffer: memoryview) -> Optional[str]: """Helpful for checking if actual compression matches expected.""" try: From f8126b16844aab8142de7e65c871d084f0e82566 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 01:49:28 -0700 Subject: [PATCH 19/33] use proper hub dev toekn fixture --- hub/tests/storage_fixtures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hub/tests/storage_fixtures.py b/hub/tests/storage_fixtures.py index 99ef5b5cbe..f15f21f552 100644 --- a/hub/tests/storage_fixtures.py +++ b/hub/tests/storage_fixtures.py @@ -27,8 +27,8 @@ def s3_storage(s3_path): @pytest.fixture -def hub_cloud_storage(hub_cloud_path, hub_cloud_token): - return S3Provider(hub_cloud_path, token=hub_cloud_token) +def hub_cloud_storage(hub_cloud_path, hub_cloud_dev_token): + return S3Provider(hub_cloud_path, token=hub_cloud_dev_token) @pytest.fixture From 315e675a8e9c8dc1ed12e399afa04785f9fcc438 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 02:00:10 -0700 Subject: [PATCH 20/33] remove todos and use cache_chain param for contributing doc --- CONTRIBUTING.md | 2 +- hub/tests/client_fixtures.py | 1 - hub/tests/dataset_fixtures.py | 2 +- hub/tests/path_fixtures.py | 3 --- 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4ff71ecf93..2d0b4aaae8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -87,7 +87,7 @@ def test_memory_storage(memory_storage: StorageProvider): Caches ```python @enabled_cache_chains -def test_cache(cache: StorageProvider): # note: caches are provided as `StorageProvider`s +def test_cache(cache_chain: StorageProvider): # note: caches are provided as `StorageProvider`s # this test runs for every cache chain that contains all enabled storage providers. # if only memory is enabled (no providers are explicitly enabled), this test will be skipped. pass diff --git a/hub/tests/client_fixtures.py b/hub/tests/client_fixtures.py index 68e07d41fb..2d353b3546 100644 --- a/hub/tests/client_fixtures.py +++ b/hub/tests/client_fixtures.py @@ -8,7 +8,6 @@ @pytest.fixture(scope="session") def hub_cloud_dev_credentials(request): - # TODO: skipif if not is_opt_true(request, HUB_CLOUD_OPT): pytest.skip() diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py index d8711326d9..3e7a7a63fd 100644 --- a/hub/tests/dataset_fixtures.py +++ b/hub/tests/dataset_fixtures.py @@ -4,7 +4,7 @@ enabled_datasets = pytest.mark.parametrize( "ds", - ["memory_ds", "local_ds", "s3_ds", "hub_cloud_ds"], # TODO: add hub cloud + ["memory_ds", "local_ds", "s3_ds", "hub_cloud_ds"], indirect=True, ) diff --git a/hub/tests/path_fixtures.py b/hub/tests/path_fixtures.py index e78928eaf4..08a429be22 100644 --- a/hub/tests/path_fixtures.py +++ b/hub/tests/path_fixtures.py @@ -125,9 +125,6 @@ def s3_path(request): @pytest.fixture def hub_cloud_path(request, hub_cloud_dev_token): - # TODO: can probably generalize these fixtures - - # TODO: skipif if not is_opt_true(request, HUB_CLOUD_OPT): pytest.skip() return From 20ba0ab5ed0995a7c8f20073434437213302f3fe Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 02:04:00 -0700 Subject: [PATCH 21/33] get storage provider from hub path --- hub/tests/storage_fixtures.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hub/tests/storage_fixtures.py b/hub/tests/storage_fixtures.py index f15f21f552..d685b492b3 100644 --- a/hub/tests/storage_fixtures.py +++ b/hub/tests/storage_fixtures.py @@ -1,3 +1,4 @@ +from hub.util.get_storage_provider import storage_provider_from_hub_path from hub.core.storage.s3 import S3Provider from hub.core.storage.local import LocalProvider from hub.core.storage.memory import MemoryProvider @@ -28,7 +29,7 @@ def s3_storage(s3_path): @pytest.fixture def hub_cloud_storage(hub_cloud_path, hub_cloud_dev_token): - return S3Provider(hub_cloud_path, token=hub_cloud_dev_token) + return storage_provider_from_hub_path(hub_cloud_path, token=hub_cloud_dev_token) @pytest.fixture From 5e90d316807d5cd93ca2a349dda156d811a686a9 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 02:15:14 -0700 Subject: [PATCH 22/33] session ID is 4 chars --- hub/tests/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hub/tests/common.py b/hub/tests/common.py index 6751363841..f8168411c7 100644 --- a/hub/tests/common.py +++ b/hub/tests/common.py @@ -3,7 +3,7 @@ import os import pathlib from typing import List, Optional -from uuid import uuid1 +from uuid import uuid4 import numpy as np import posixpath @@ -11,7 +11,7 @@ from hub.constants import KB, MB -SESSION_ID = str(uuid1()) +SESSION_ID = str(uuid4())[:4] # 4 ascii chars should be sufficient _THIS_FILE = pathlib.Path(__file__).parent.absolute() TENSOR_KEY = "tensor" From bad7d3d3b298e06e975c6cb1f2df1a0cd4b9fb4f Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 02:20:54 -0700 Subject: [PATCH 23/33] removed delete calls --- hub/api/tests/test_api.py | 6 ------ hub/integrations/tests/test_tensorflow.py | 1 - 2 files changed, 7 deletions(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 7c1436eef3..be1ec7bbce 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -33,8 +33,6 @@ def test_persist(ds_generator): assert ds_new.meta.version == hub.__version__ - ds.delete() - @enabled_persistent_dataset_generators def test_persist_with(ds_generator): @@ -58,8 +56,6 @@ def test_persist_with(ds_generator): assert ds_new.meta.version == hub.__version__ - ds.delete() - @enabled_persistent_dataset_generators def test_persist_clear_cache(ds_generator): @@ -73,7 +69,6 @@ def test_persist_clear_cache(ds_generator): assert ds_new.image.shape == (4, 224, 224, 3) np.testing.assert_array_equal(ds_new.image.numpy(), np.ones((4, 224, 224, 3))) - ds.delete() @enabled_datasets @@ -465,7 +460,6 @@ def test_array_interface(memory_ds: Dataset): def test_hub_dataset_suffix_bug(ds): # creating dataset with similar name but some suffix removed from end ds2 = Dataset(ds.path[:-1]) - ds2.delete() def test_empty_dataset(): diff --git a/hub/integrations/tests/test_tensorflow.py b/hub/integrations/tests/test_tensorflow.py index 90b77e3329..7a81f7c84c 100644 --- a/hub/integrations/tests/test_tensorflow.py +++ b/hub/integrations/tests/test_tensorflow.py @@ -34,4 +34,3 @@ def test_tensorflow_small(local_ds): # converting tf Tensors to numpy np.testing.assert_array_equal(batch["image"].numpy(), i * np.ones((300, 300))) np.testing.assert_array_equal(batch["image2"].numpy(), i * np.ones((100, 100))) - local_ds.delete() From ce276a773462b44a86aa4ce67913321d7ee56ab8 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 02:25:28 -0700 Subject: [PATCH 24/33] for hub cloud use storage --- hub/tests/dataset_fixtures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py index 3e7a7a63fd..2614f6f937 100644 --- a/hub/tests/dataset_fixtures.py +++ b/hub/tests/dataset_fixtures.py @@ -52,9 +52,9 @@ def hub_cloud_ds(hub_cloud_ds_generator): @pytest.fixture -def hub_cloud_ds_generator(hub_cloud_path, hub_cloud_dev_token): +def hub_cloud_ds_generator(hub_cloud_storage): def generate_hub_cloud_ds(): - return Dataset(hub_cloud_path, token=hub_cloud_dev_token) + return Dataset(storage=hub_cloud_storage) return generate_hub_cloud_ds From 7356a46cd60fea66e25df10b9aeeb40eb0c8d005 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 02:25:37 -0700 Subject: [PATCH 25/33] remove ds = --- hub/api/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index be1ec7bbce..3acd87cb4e 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -459,7 +459,7 @@ def test_array_interface(memory_ds: Dataset): @enabled_datasets def test_hub_dataset_suffix_bug(ds): # creating dataset with similar name but some suffix removed from end - ds2 = Dataset(ds.path[:-1]) + Dataset(ds.path[:-1]) def test_empty_dataset(): From 415462c1eb1360da57dd4d5cb6507dbea1075725 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 15:58:14 -0700 Subject: [PATCH 26/33] better error messaging --- hub/api/dataset.py | 9 ++++++++- hub/client/client.py | 7 ++++++- hub/constants.py | 2 ++ hub/tests/client_fixtures.py | 14 +++++++++++--- hub/util/exceptions.py | 22 ++++++++++++++++++++++ 5 files changed, 49 insertions(+), 5 deletions(-) diff --git a/hub/api/dataset.py b/hub/api/dataset.py index b6a802c217..0d12b80f3f 100644 --- a/hub/api/dataset.py +++ b/hub/api/dataset.py @@ -15,8 +15,10 @@ from hub.util.bugout_reporter import hub_reporter from hub.util.cache_chain import generate_chain from hub.util.exceptions import ( + CouldNotCreateNewDatasetException, InvalidKeyTypeError, PathNotEmptyException, + ReadOnlyModeError, TensorAlreadyExistsError, TensorDoesNotExistError, ) @@ -218,7 +220,12 @@ def _load_meta(self): else: self.meta = DatasetMeta() - self.storage[meta_key] = self.meta + + try: + self.storage[meta_key] = self.meta + except ReadOnlyModeError: + # if this is thrown, that means the dataset doesn't exist and the user has no write access. + raise CouldNotCreateNewDatasetException(self.path) self.flush() if self.path.startswith("hub://"): diff --git a/hub/client/client.py b/hub/client/client.py index 28899eabe2..4da9f38e9b 100644 --- a/hub/client/client.py +++ b/hub/client/client.py @@ -1,7 +1,7 @@ import sys import requests from typing import Optional -from hub.util.exceptions import LoginException +from hub.util.exceptions import InvalidPasswordException, LoginException from hub.client.utils import check_response_status, write_token, read_token from hub.client.config import ( HUB_REST_ENDPOINT, @@ -96,6 +96,11 @@ def request( timeout=timeout, ) + # clearer error than `ServerUnderMaintenence` + if "password" in json and json["password"] is None: + # do NOT pass in the password here. `None` is explicitly typed. + raise InvalidPasswordException("Password cannot be `None`.") + check_response_status(response) return response diff --git a/hub/constants.py b/hub/constants.py index 87f1acbe75..60f76469cb 100644 --- a/hub/constants.py +++ b/hub/constants.py @@ -56,6 +56,8 @@ PYTEST_S3_PROVIDER_BASE_ROOT = "s3://hub-2.0-tests/" PYTEST_HUB_CLOUD_PROVIDER_BASE_ROOT = f"hub://{HUB_CLOUD_DEV_USERNAME}/" +HUB_DEV_PASSWORD_ENVIRONMENT_VARIABLE = "ACTIVELOOP_HUB_PASSWORD" + # pytest options MEMORY_OPT = "--memory-skip" diff --git a/hub/tests/client_fixtures.py b/hub/tests/client_fixtures.py index 2d353b3546..108b68776f 100644 --- a/hub/tests/client_fixtures.py +++ b/hub/tests/client_fixtures.py @@ -1,9 +1,12 @@ -from hub.constants import HUB_CLOUD_DEV_USERNAME, HUB_CLOUD_OPT +from hub.constants import ( + HUB_CLOUD_DEV_USERNAME, + HUB_CLOUD_OPT, + HUB_DEV_PASSWORD_ENVIRONMENT_VARIABLE, +) from hub.tests.common import is_opt_true import os import pytest from hub.client.client import HubBackendClient -import hub @pytest.fixture(scope="session") @@ -13,7 +16,12 @@ def hub_cloud_dev_credentials(request): # TODO: use dev environment - password = os.getenv("ACTIVELOOP_HUB_PASSWORD") + password = os.getenv(HUB_DEV_PASSWORD_ENVIRONMENT_VARIABLE) + + assert ( + password is not None + ), f"Hub dev password was not found in the environment variable '{HUB_DEV_PASSWORD_ENVIRONMENT_VARIABLE}'. This is necessary for testing hub cloud datasets." + return HUB_CLOUD_DEV_USERNAME, password diff --git a/hub/util/exceptions.py b/hub/util/exceptions.py index bab69f6911..d226f46733 100644 --- a/hub/util/exceptions.py +++ b/hub/util/exceptions.py @@ -140,6 +140,28 @@ def __init__( super().__init__(message) +class InvalidPasswordException(AuthorizationException): + def __init__( + self, + message="The password you provided was invalid.", + ): + super().__init__(message) + + +class CouldNotCreateNewDatasetException(AuthorizationException): + def __init__( + self, + path: str, + ): + + extra = "" + if path.startswith("hub://"): + extra = "Since the path is a `hub://` dataset, if you believe you should have write permissions, try running `activeloop login`." + + message = f"Dataset at '{path}' doesn't exist, and you have no permissions to create one there. Maybe a typo? {extra}" + super().__init__(message) + + class ResourceNotFoundException(Exception): def __init__( self, From 1b4892bfa6b62dd98c3d8c6951dcddb24a0468eb Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 16:26:29 -0700 Subject: [PATCH 27/33] raise more proper error --- hub/api/dataset.py | 11 ++++++++--- hub/api/tests/test_readonly.py | 25 ++++++++++++++----------- hub/core/storage/tests/test_readonly.py | 9 +-------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/hub/api/dataset.py b/hub/api/dataset.py index 0d12b80f3f..84f37edcbc 100644 --- a/hub/api/dataset.py +++ b/hub/api/dataset.py @@ -75,11 +75,16 @@ def __init__( creds = {} base_storage = get_storage_provider(path, storage, read_only, creds, token) - # done instead of directly assigning read_only as backend might return read_only permissions - if hasattr(base_storage, "read_only") and base_storage.read_only: + if read_only: + # if user passes `read_only`, we should ALWAYS open in read-only mode. self._read_only = True + base_storage.read_only = True else: - self._read_only = False + # done instead of directly assigning read_only as backend might return read_only permissions + if hasattr(base_storage, "read_only") and base_storage.read_only: + self._read_only = True + else: + self._read_only = False # uniquely identifies dataset self.path = path or get_path_from_storage(base_storage) diff --git a/hub/api/tests/test_readonly.py b/hub/api/tests/test_readonly.py index 1e73337436..6bd0c361ff 100644 --- a/hub/api/tests/test_readonly.py +++ b/hub/api/tests/test_readonly.py @@ -3,7 +3,7 @@ import numpy as np from hub import Dataset -from hub.util.exceptions import ReadOnlyModeError +from hub.util.exceptions import CouldNotCreateNewDatasetException, ReadOnlyModeError def _assert_readonly_ops(ds, num_samples: int, sample_shape: Tuple[int]): @@ -18,15 +18,18 @@ def _assert_readonly_ops(ds, num_samples: int, sample_shape: Tuple[int]): assert ds.tensor.shape == (num_samples, *sample_shape) -def test_readonly(local_ds): - path = local_ds.path +def test_readonly(local_ds_generator): + ds = local_ds_generator() + ds.create_tensor("tensor") + ds.tensor.append(np.ones((100, 100))) + ds.read_only = True + _assert_readonly_ops(ds, 1, (100, 100)) - local_ds.create_tensor("tensor") - local_ds.tensor.append(np.ones((100, 100))) - local_ds.read_only = True - _assert_readonly_ops(local_ds, 1, (100, 100)) - del local_ds + ds = local_ds_generator() + ds.read_only = True + _assert_readonly_ops(ds, 1, (100, 100)) - local_ds = Dataset(path) - local_ds.read_only = True - _assert_readonly_ops(local_ds, 1, (100, 100)) + +@pytest.mark.xfail(raises=CouldNotCreateNewDatasetException, strict=True) +def test_readonly_doesnt_exist(local_path): + Dataset(local_path, read_only=True) diff --git a/hub/core/storage/tests/test_readonly.py b/hub/core/storage/tests/test_readonly.py index 349f44467e..c5005f4da8 100644 --- a/hub/core/storage/tests/test_readonly.py +++ b/hub/core/storage/tests/test_readonly.py @@ -1,5 +1,5 @@ import pytest -from hub.util.exceptions import ReadOnlyModeError +from hub.util.exceptions import CouldNotCreateNewDatasetException, ReadOnlyModeError from hub import Dataset from hub.tests.storage_fixtures import enabled_storages @@ -30,10 +30,3 @@ def test_readonly_clear(storage): def test_readonly_flush(storage): storage.enable_readonly() storage.flush() - - -@pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@enabled_storages -def test_readonly_ds_create_tensor(storage): - ds = Dataset(read_only=True, storage=storage) - ds.create_tensor("test") From 09f0f4de8b159ee5b31ba65173d7f6871707eeb0 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 16:33:12 -0700 Subject: [PATCH 28/33] update client docstring --- hub/client/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hub/client/client.py b/hub/client/client.py index 4da9f38e9b..8a3defaad1 100644 --- a/hub/client/client.py +++ b/hub/client/client.py @@ -70,6 +70,9 @@ def request( headers (dict, optional): Dictionary of HTTP Headers to send with the request. timeout (float,optional): How many seconds to wait for the server to send data before giving up. + Raises: + InvalidPasswordException: `password` cannot be `None` inside `json`. + Returns: requests.Response: The response received from the server. """ From 9cc4fdc79323c0f376978717b136f8f1bec5059f Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Wed, 14 Jul 2021 16:56:38 -0700 Subject: [PATCH 29/33] delete dataset after suffix bug test --- hub/api/tests/test_api.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 3acd87cb4e..e7160764ce 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -456,10 +456,12 @@ def test_array_interface(memory_ds: Dataset): np.testing.assert_array_equal(tensor.numpy(), np.concatenate([arr1, arr2])) -@enabled_datasets -def test_hub_dataset_suffix_bug(ds): +def test_hub_dataset_suffix_bug(hub_cloud_ds): # creating dataset with similar name but some suffix removed from end - Dataset(ds.path[:-1]) + ds = Dataset(hub_cloud_ds.path[:-1]) + + # need to delete because it's a different path (won't be auto cleaned up) + ds.delete() def test_empty_dataset(): From 2815c4e18375c2492af283f2b4fa1fb1edcb9e8f Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Thu, 15 Jul 2021 11:58:37 -0700 Subject: [PATCH 30/33] fix path / token issue --- hub/api/tests/test_api.py | 4 ++-- hub/tests/dataset_fixtures.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index e7160764ce..274f7cb935 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -456,9 +456,9 @@ def test_array_interface(memory_ds: Dataset): np.testing.assert_array_equal(tensor.numpy(), np.concatenate([arr1, arr2])) -def test_hub_dataset_suffix_bug(hub_cloud_ds): +def test_hub_dataset_suffix_bug(hub_cloud_ds, hub_cloud_dev_token): # creating dataset with similar name but some suffix removed from end - ds = Dataset(hub_cloud_ds.path[:-1]) + ds = Dataset(hub_cloud_ds.path[:-1], token=hub_cloud_dev_token) # need to delete because it's a different path (won't be auto cleaned up) ds.delete() diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py index 2614f6f937..3e7a7a63fd 100644 --- a/hub/tests/dataset_fixtures.py +++ b/hub/tests/dataset_fixtures.py @@ -52,9 +52,9 @@ def hub_cloud_ds(hub_cloud_ds_generator): @pytest.fixture -def hub_cloud_ds_generator(hub_cloud_storage): +def hub_cloud_ds_generator(hub_cloud_path, hub_cloud_dev_token): def generate_hub_cloud_ds(): - return Dataset(storage=hub_cloud_storage) + return Dataset(hub_cloud_path, token=hub_cloud_dev_token) return generate_hub_cloud_ds From 67b5ca6f1258b07254d81f859191fb07d4bea54e Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Thu, 15 Jul 2021 12:07:52 -0700 Subject: [PATCH 31/33] remove extra readonly test --- hub/api/dataset.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/hub/api/dataset.py b/hub/api/dataset.py index 84f37edcbc..0d12b80f3f 100644 --- a/hub/api/dataset.py +++ b/hub/api/dataset.py @@ -75,16 +75,11 @@ def __init__( creds = {} base_storage = get_storage_provider(path, storage, read_only, creds, token) - if read_only: - # if user passes `read_only`, we should ALWAYS open in read-only mode. + # done instead of directly assigning read_only as backend might return read_only permissions + if hasattr(base_storage, "read_only") and base_storage.read_only: self._read_only = True - base_storage.read_only = True else: - # done instead of directly assigning read_only as backend might return read_only permissions - if hasattr(base_storage, "read_only") and base_storage.read_only: - self._read_only = True - else: - self._read_only = False + self._read_only = False # uniquely identifies dataset self.path = path or get_path_from_storage(base_storage) From 70dfb759772f4370d2807c0bb510d5ce6c99f1da Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Thu, 15 Jul 2021 13:38:40 -0700 Subject: [PATCH 32/33] mention --keep-storage with memory doesn't work --- CONTRIBUTING.md | 2 +- conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2d0b4aaae8..ab276822a3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,7 +40,7 @@ Combine any of the following options to suit your test cases. - `--hub-cloud`: Enable hub cloud tests. - `--memory-skip`: Disable memory tests. - `--s3-path`: Specify an s3 path if you don't have access to our internal testing bucket. -- `--keep-storage`: By default all storages are cleaned up after tests run. Enable this option if you need to check the storage contents. +- `--keep-storage`: By default all storages are cleaned up after tests run. Enable this option if you need to check the storage contents. Note: `--keep-storage` does not keep memory tests storage. ### Extra Resources diff --git a/conftest.py b/conftest.py index 032e34269b..5f0d8a5efe 100644 --- a/conftest.py +++ b/conftest.py @@ -41,7 +41,7 @@ def pytest_addoption(parser): KEEP_STORAGE_OPT, action="store_true", help="All storage providers/datasets will have their pytest data wiped. \ - Use this option to keep the data after the test run.", + Use this option to keep the data after the test run. Note: does not keep memory tests storage.", ) From 65071f852a47138849e7484ad93442655a80f830 Mon Sep 17 00:00:00 2001 From: McCrearyD Date: Thu, 15 Jul 2021 13:38:52 -0700 Subject: [PATCH 33/33] remove old assertion --- hub/api/tests/test_api.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 274f7cb935..e31e58de04 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -445,13 +445,12 @@ def test_fails_on_wrong_tensor_syntax(memory_ds): def test_array_interface(memory_ds: Dataset): tensor = memory_ds.create_tensor("tensor") - x = np.random.random((32, 32)) + x = np.arange(10).reshape(5, 2) tensor.append(x) arr1 = np.array(tensor) arr2 = np.array(tensor) np.testing.assert_array_equal(x, arr1[0]) np.testing.assert_array_equal(x, arr2[0]) - assert arr1.__array_interface__["data"][0] == arr1.__array_interface__["data"][0] tensor.append(x) np.testing.assert_array_equal(tensor.numpy(), np.concatenate([arr1, arr2]))