diff --git a/.circleci/config.yml b/.circleci/config.yml index c552a8e04f..3e765b8336 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -169,7 +169,7 @@ commands: command: | $Env:GOOGLE_APPLICATION_CREDENTIALS = $Env:CI_GCS_PATH setx /m GOOGLE_APPLICATION_CREDENTIALS "$Env:GOOGLE_APPLICATION_CREDENTIALS" - python3 -m pytest --cov-report=xml --cov=./ --local --s3 --cache-chains + python3 -m pytest --cov-report=xml --cov=./ --local --s3 --hub-cloud - when: condition: << parameters.unix-like >> steps: @@ -177,7 +177,7 @@ commands: name: "Running tests - Unix" command: | export GOOGLE_APPLICATION_CREDENTIALS=$HOME/.secrets/gcs.json - python3 -m pytest --cov-report=xml --cov=./ --local --s3 --cache-chains + python3 -m pytest --cov-report=xml --cov=./ --local --s3 --hub-cloud parallelism: 10 codecov-upload: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 51c9b2c1ae..ab276822a3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -16,7 +16,7 @@ We also use static typing for our function arguments/variables for better code r ## Testing We use [pytest](https://docs.pytest.org/en/6.2.x/) for our tests. In order to make it easier, we also have a set of custom options defined in [conftest.py](conftest.py). -## To install all dependencies run: +### To install all dependencies run: ``` pip3 install -r requirements/common.txt @@ -24,112 +24,74 @@ pip3 install -r requirements/plugins.txt pip3 install -r requirements/tests.txt ``` -### Running Tests - -- To run memory only tests, use: `python -m pytest .`. -- To run local only tests, use: `python -m pytest . --memory-skip --local`. -- To run s3 only tests, use: `python -m pytest . --memory-skip --s3`. -- To run cache chain only tests, use: `python -m pytest . --local --s3 --cache-chains-only`. Note: you can opt out of `--local` or `--s3`, the cache chains produced will only contain enabled storage providers. -- To run ALL tests, use: `python -m pytest . --local --s3 --cache-chains`. -### Prerequisites -- Understand how to write [pytest](https://docs.pytest.org/en/6.2.x/) tests. -- Understand what a [pytest fixture](https://docs.pytest.org/en/6.2.x/fixture.html) is. -- Understand what [pytest parametrizations](https://docs.pytest.org/en/6.2.x/parametrize.html) are. +### Running Tests +- `pytest .`: Run all tests with memory only. +- `pytest . --local`: Run all tests with memory and local. +- `pytest . --s3`: Run all tests with memory and s3. +- `pytest . --memory-skip --hub-cloud`: Run all tests with hub cloud only. ### Options -To see a list of our custom pytest options, run this command: `pytest -h | sed -En '/custom options:/,/\[pytest\] ini\-options/p'`. +Combine any of the following options to suit your test cases. -### Fixtures -You can find more information on pytest fixtures [here](https://docs.pytest.org/en/6.2.x/fixture.html). +- `--local`: Enable local tests. +- `--s3`: Enable S3 tests. +- `--hub-cloud`: Enable hub cloud tests. +- `--memory-skip`: Disable memory tests. +- `--s3-path`: Specify an s3 path if you don't have access to our internal testing bucket. +- `--keep-storage`: By default all storages are cleaned up after tests run. Enable this option if you need to check the storage contents. Note: `--keep-storage` does not keep memory tests storage. -- `memory_storage`: If `--memory-skip` is provided, tests with this fixture will be skipped. Otherwise, the test will run with only a `MemoryProvider`. -- `local_storage`: If `--local` is **not** provided, tests with this fixture will be skipped. Otherwise, the test will run with only a `LocalProvider`. -- `s3_storage`: If `--s3` is **not** provided, tests with this fixture will be skipped. Otherwise, the test will run with only an `S3Provider`. -- `storage`: All tests that use the `storage` fixture will be parametrized with the enabled `StorageProvider`s (enabled via options defined below). If `--cache-chains` is provided, `storage` may also be a cache chain. Cache chains have the same interface as `StorageProvider`, but instead of just a single provider, it is multiple chained in a sequence, where the last provider in the chain is considered the actual storage. -- `ds`: The same as the `storage` fixture, but the storages that are parametrized are wrapped with a `Dataset`. -Each `StorageProvider`/`Dataset` that is created for a test via a fixture will automatically have a root created before and destroyed after the test. If you want to keep this data after the test run, you can use the `--keep-storage` option. +### Extra Resources +If you feel lost with any of these sections, try reading up on some of these topics. +- Understand how to write [pytest](https://docs.pytest.org/en/6.2.x/) tests. +- Understand what a [pytest fixture](https://docs.pytest.org/en/6.2.x/fixture.html) is. +- Understand what [pytest parametrizations](https://docs.pytest.org/en/6.2.x/parametrize.html) are. -#### Fixture Examples +### Fixture Usage Examples +These are not all of the available fixtures. You can see all of them [here](hub/tests/). -Single storage provider fixture +Datasets ```python -def test_memory(memory_storage): - # test will skip if `--memory-skip` is provided - memory_storage["key"] = b"1234" # this data will only be stored in memory - -def test_local(local_storage): - # test will skip if `--local` is not provided - memory_storage["key"] = b"1234" # this data will only be stored locally - -def test_local(s3_storage): - # test will skip if `--s3` is not provided - # test will fail if credentials are not provided - memory_storage["key"] = b"1234" # this data will only be stored in s3 -``` +@enabled_datasets +def test_dataset(ds: Dataset): + # this test will run once per enabled storage provider. if no providers are explicitly enabled, + # only memory will be used. + pass -Multiple storage providers/cache chains -```python -from hub.core.tests.common import parametrize_all_storages, parametrize_all_caches, parametrize_all_storages_and_caches - -@parametrize_all_storages -def test_storage(storage): - # storage will be parametrized with all enabled `StorageProvider`s - pass - -@parametrize_all_caches -def test_caches(storage): - # storage will be parametrized with all common caches containing enabled `StorageProvider`s - pass - -@parametrize_all_storages_and_caches -def test_storages_and_caches(storage): - # storage will be parametrized with all enabled `StorageProvider`s and common caches containing enabled `StorageProvider`s - pass -``` +def test_local_dataset(local_ds: Dataset): + # this test will run only once with a local dataset. if the `--local` option is not provided, + # this test will be skipped. + pass +``` -Dataset storage providers/cache chains +Storages ```python -from hub.core.tests.common import parametrize_all_dataset_storages, parametrize_all_dataset_storages_and_caches +@enabled_storages +def test_storage(storage: StorageProvider): + # this test will run once per enabled storage provider. if no providers are explicitly enabled, + # only memory will be used. + pass -@parametrize_all_dataset_storages -def test_dataset(ds): - # `ds` will be parametrized with 1 `Dataset` object per enabled `StorageProvider` - pass -@parametrize_all_dataset_storages_and_caches -def test_dataset(ds): - # `ds` will be parametrized with 1 `Dataset` object per enabled `StorageProvider` and all cache chains containing enabled `StorageProvider`s - pass +def test_memory_storage(memory_storage: StorageProvider): + # this test will run only once with a memory storage provider. if the `--memory-skip` option is provided, + # this test will be skipped. + pass ``` -## Benchmarks -We use [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/latest/usage.html) for our benchmark code which is a plugin for [pytest](https://docs.pytest.org/en/6.2.x/). - -### Running Benchmarks -- To run benchmarks for memory only, use: - - ```python -m pytest . --benchmark-only``` - -- To run ALL **fast** benchmarks, use: - - ```python -m pytest . --local --s3 --cache-chains --benchmark-only```. - - Note: this only runs the subset of benchmarks that finish quickly. - -- To run ALL **fast AND slow** benchmarks, use: - - ```python -m pytest . --local --s3 --full-benchmarks --benchmark-only``` - - Note: this will take a while... (also cache chains are implicitly enabled from `--full-benchmarks`.) - -- You can opt out of `--local` and `--s3` for all commands, or add `--memory-skip`. Also `--cache-chains-only` works. -- Optionally, you can remove the `--benchmark-only` flag in any of these commands to run normal tests alongside the benchmarks. +Caches +```python +@enabled_cache_chains +def test_cache(cache_chain: StorageProvider): # note: caches are provided as `StorageProvider`s + # this test runs for every cache chain that contains all enabled storage providers. + # if only memory is enabled (no providers are explicitly enabled), this test will be skipped. + pass +``` ## Generating API Docs diff --git a/conftest.py b/conftest.py index 2ccb3091e1..5f0d8a5efe 100644 --- a/conftest.py +++ b/conftest.py @@ -1,8 +1,6 @@ from hub.core.storage.provider import StorageProvider import os -import posixpath import logging -import pytest # Disable crash reporting before running tests # This MUST come before hub imports to bypass import publication. @@ -11,90 +9,27 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" logging.basicConfig(level=logging.ERROR) -from hub.api.dataset import Dataset -from hub.constants import ( - MIN_FIRST_CACHE_SIZE, - MIN_SECOND_CACHE_SIZE, - PYTEST_LOCAL_PROVIDER_BASE_ROOT, - PYTEST_MEMORY_PROVIDER_BASE_ROOT, - PYTEST_S3_PROVIDER_BASE_ROOT, -) -from hub.core.storage import LocalProvider, MemoryProvider, S3Provider -from hub.core.tests.common import LOCAL, MEMORY, S3 -from hub.tests.common import SESSION_ID, current_test_name, get_dummy_data_path -from hub.util.cache_chain import get_cache_chain +from hub.constants import * +from hub.tests.common import SESSION_ID - -MEMORY_OPT = "--memory-skip" -LOCAL_OPT = "--local" -S3_OPT = "--s3" -CACHE_OPT = "--cache-chains" -CACHE_ONLY_OPT = "--cache-chains-only" -S3_PATH_OPT = "--s3-path" -KEEP_STORAGE_OPT = "--keep-storage" -FULL_BENCHMARK_OPT = "--full-benchmarks" - -# @pytest.mark.`FULL_BENCHMARK_MARK` is how full benchmarks are notated -FULL_BENCHMARK_MARK = "full_benchmark" - - -def _get_storage_configs(request): - return { - MEMORY: { - "base_root": PYTEST_MEMORY_PROVIDER_BASE_ROOT, - "class": MemoryProvider, - "use_id": False, - "is_id_prefix": False, - # if is_id_prefix (and use_id=True), the session id comes before test name, otherwise it is reversed - }, - LOCAL: { - "base_root": PYTEST_LOCAL_PROVIDER_BASE_ROOT, - "class": LocalProvider, - "use_id": False, - "is_id_prefix": False, - }, - S3: { - "base_root": request.config.getoption(S3_PATH_OPT), - "class": S3Provider, - "use_id": True, - "is_id_prefix": True, - }, - } - - -def _is_opt_true(request, opt): - return request.config.getoption(opt) +# import * so all fixtures can be used accross the project +from hub.tests.path_fixtures import * +from hub.tests.dataset_fixtures import * +from hub.tests.storage_fixtures import * +from hub.tests.cache_fixtures import * +from hub.tests.client_fixtures import * def pytest_addoption(parser): parser.addoption( - MEMORY_OPT, - action="store_true", - help="Tests using the `memory_storage` fixture will be skipped. Tests using the `storage` fixture will be " - "skipped if called with `MemoryProvider`.", + MEMORY_OPT, action="store_true", help="Memory tests will be SKIPPED if enabled." ) parser.addoption( - LOCAL_OPT, - action="store_true", - help="Tests using the `storage`/`local_storage` fixtures will run with `LocalProvider`.", + LOCAL_OPT, action="store_true", help="Local tests will run if enabled." ) + parser.addoption(S3_OPT, action="store_true", help="S3 tests will run if enabled.") parser.addoption( - S3_OPT, - action="store_true", - help="Tests using the `storage`/`s3_storage` fixtures will run with `S3Provider`.", - ) - parser.addoption( - CACHE_OPT, - action="store_true", - help="Tests using the `storage` fixture may run with combinations of all enabled providers in cache chains. " - f"For example, if the option `{S3_OPT}` is not provided, all cache chains that use `S3Provider`" - " are skipped.", - ) - parser.addoption( - CACHE_ONLY_OPT, - action="store_true", - help=f"Force enables `{CACHE_OPT}`. `storage` fixture only returns cache chains. For example, if `{S3_OPT}` is provided, \ - `storage` will never be just `S3Provider`.", + HUB_CLOUD_OPT, action="store_true", help="Hub cloud tests will run if enabled." ) parser.addoption( S3_PATH_OPT, @@ -102,173 +37,13 @@ def pytest_addoption(parser): help="Url to s3 bucket with optional key. Example: s3://bucket_name/key/to/tests/", default=PYTEST_S3_PROVIDER_BASE_ROOT, ) - parser.addoption( - FULL_BENCHMARK_OPT, - action="store_true", - help="Some benchmarks take a long time to run and by default should be skipped. This option enables them. " - "It also force enables `--cache-chains`.", - ) parser.addoption( KEEP_STORAGE_OPT, action="store_true", help="All storage providers/datasets will have their pytest data wiped. \ - Use this option to keep the data after the test run.", - ) - - -def _get_storage_provider( - request, storage_name, with_current_test_name=True, info_override={} -): - info = _get_storage_configs(request)[storage_name] - info.update(info_override) - - root = info["base_root"] - - path = "" - if with_current_test_name: - path = current_test_name() - - if info["use_id"]: - if info["is_id_prefix"]: - path = posixpath.join(SESSION_ID, path) - else: - path = posixpath.join(path, SESSION_ID) - - root = posixpath.join(root, path) - return info["class"](root) - - -def _get_memory_provider(request): - return _get_storage_provider(request, MEMORY) - - -def _get_local_provider(request): - return _get_storage_provider(request, LOCAL) - - -def _get_s3_provider(request): - return _get_storage_provider(request, S3) - - -def _get_dataset(storage: StorageProvider): - return Dataset(storage=storage) - - -@pytest.fixture -def marks(request): - """Fixture that gets all `@pytest.mark`s. If a test is marked with - `@pytest.mark.some_mark` the list this fixture returns will contain - `some_mark` as a string. - """ - - marks = [m.name for m in request.node.iter_markers()] - if request.node.parent: - marks += [m.name for m in request.node.parent.iter_markers()] - yield marks - - -def _storage_from_request(request): - requested_providers = request.param.split(",") - - # --cache-chains-only force enables --cache-chains - use_cache_chains_only = _is_opt_true(request, CACHE_ONLY_OPT) - use_cache_chains = ( - _is_opt_true(request, CACHE_OPT) - or use_cache_chains_only - or _is_opt_true(request, FULL_BENCHMARK_OPT) + Use this option to keep the data after the test run. Note: does not keep memory tests storage.", ) - if use_cache_chains_only and len(requested_providers) <= 1: - pytest.skip() - - if not use_cache_chains and len(requested_providers) > 1: - pytest.skip() - - storage_providers = [] - cache_sizes = [] - - if MEMORY in requested_providers: - if _is_opt_true(request, MEMORY_OPT): - pytest.skip() - - storage_providers.append(_get_memory_provider(request)) - cache_sizes.append(MIN_FIRST_CACHE_SIZE) - if LOCAL in requested_providers: - if not _is_opt_true(request, LOCAL_OPT): - pytest.skip() - - storage_providers.append(_get_local_provider(request)) - cache_size = MIN_FIRST_CACHE_SIZE if not cache_sizes else MIN_SECOND_CACHE_SIZE - cache_sizes.append(cache_size) - if S3 in requested_providers: - if not _is_opt_true(request, S3_OPT): - pytest.skip() - - storage_providers.append(_get_s3_provider(request)) - - if len(storage_providers) == len(cache_sizes): - cache_sizes.pop() - - return get_cache_chain(storage_providers, cache_sizes) - - -@pytest.fixture -def memory_storage(request): - if not _is_opt_true(request, MEMORY_OPT): - return _get_memory_provider(request) - pytest.skip() - - -@pytest.fixture -def local_storage(request): - if _is_opt_true(request, LOCAL_OPT): - return _get_local_provider(request) - pytest.skip() - - -@pytest.fixture -def s3_storage(request): - if _is_opt_true(request, S3_OPT): - return _get_s3_provider(request) - pytest.skip() - - -@pytest.fixture -def storage(request): - return _storage_from_request(request) - - -@pytest.fixture -def memory_ds(memory_storage): - return _get_dataset(memory_storage) - - -@pytest.fixture -def local_ds(local_storage): - return _get_dataset(local_storage) - - -@pytest.fixture -def s3_ds(s3_storage): - return _get_dataset(s3_storage) - - -@pytest.fixture -def ds(request): - return _get_dataset(_storage_from_request(request)) - - -@pytest.fixture -def cat_path(): - path = get_dummy_data_path("compressed_images") - return os.path.join(path, "cat.jpeg") - - -@pytest.fixture -def flower_path(): - path = get_dummy_data_path("compressed_images") - return os.path.join(path, "flower.png") - def print_session_id(): print("\n\n----------------------------------------------------------") @@ -277,52 +52,3 @@ def print_session_id(): print_session_id() - - -def _clear_storages(request): - # clear memory - if not _is_opt_true(request, MEMORY_OPT): - storage = _get_storage_provider(request, MEMORY, with_current_test_name=False) - storage.clear() - - # clear local - if _is_opt_true(request, LOCAL_OPT): - storage = _get_storage_provider(request, LOCAL, with_current_test_name=False) - storage.clear() - - # clear s3 - if _is_opt_true(request, S3_OPT): - storage = _get_storage_provider(request, S3, with_current_test_name=False) - storage.clear() - - -@pytest.fixture(scope="session", autouse=True) -def clear_storages_session(request): - # executed before the first test - _clear_storages(request) - - yield - - # executed after the last test - print_session_id() - - -@pytest.fixture(scope="function", autouse=True) -def clear_storages_function(request): - # executed before the current test - - yield - - # executed after the current test - if not _is_opt_true(request, KEEP_STORAGE_OPT): - _clear_storages(request) - - -@pytest.fixture(scope="function", autouse=True) -def skip_if_full_benchmarks_disabled(request, marks): - if _is_opt_true(request, FULL_BENCHMARK_OPT): - # don't skip anything if `FULL_BENCHMARK_OPT` is provided - return - - if FULL_BENCHMARK_MARK in marks: - pytest.skip() diff --git a/hub/api/dataset.py b/hub/api/dataset.py index b6a802c217..0d12b80f3f 100644 --- a/hub/api/dataset.py +++ b/hub/api/dataset.py @@ -15,8 +15,10 @@ from hub.util.bugout_reporter import hub_reporter from hub.util.cache_chain import generate_chain from hub.util.exceptions import ( + CouldNotCreateNewDatasetException, InvalidKeyTypeError, PathNotEmptyException, + ReadOnlyModeError, TensorAlreadyExistsError, TensorDoesNotExistError, ) @@ -218,7 +220,12 @@ def _load_meta(self): else: self.meta = DatasetMeta() - self.storage[meta_key] = self.meta + + try: + self.storage[meta_key] = self.meta + except ReadOnlyModeError: + # if this is thrown, that means the dataset doesn't exist and the user has no write access. + raise CouldNotCreateNewDatasetException(self.path) self.flush() if self.path.startswith("hub://"): diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index 5bb9b75751..e31e58de04 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -4,15 +4,13 @@ import hub import os from hub.api.dataset import Dataset -from hub.core.tests.common import parametrize_all_dataset_storages from hub.tests.common import assert_array_lists_equal -from hub.util.exceptions import ( - TensorDtypeMismatchError, - TensorInvalidSampleShapeError, -) -from hub.client.client import HubBackendClient -from hub.client.utils import has_hub_testing_creds +from hub.util.exceptions import TensorDtypeMismatchError, TensorInvalidSampleShapeError from click.testing import CliRunner +from hub.tests.dataset_fixtures import ( + enabled_datasets, + enabled_persistent_dataset_generators, +) # need this for 32-bit and 64-bit systems to have correct tests @@ -20,12 +18,14 @@ MAX_FLOAT_DTYPE = np.float_.__name__ -def test_persist_local(local_storage): - ds = Dataset(local_storage.root, local_cache_size=512) +@enabled_persistent_dataset_generators +def test_persist(ds_generator): + ds = ds_generator() + ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) - ds_new = Dataset(local_storage.root) + ds_new = ds_generator() assert len(ds_new) == 4 assert ds_new.image.shape == (4, 224, 224, 3) @@ -33,18 +33,17 @@ def test_persist_local(local_storage): assert ds_new.meta.version == hub.__version__ - ds.delete() - -def test_persist_with_local(local_storage): - with Dataset(local_storage.root, local_cache_size=512) as ds: +@enabled_persistent_dataset_generators +def test_persist_with(ds_generator): + with ds_generator() as ds: ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) - ds_new = Dataset(local_storage.root) + ds_new = ds_generator() assert len(ds_new) == 0 # shouldn't be flushed yet - ds_new = Dataset(local_storage.root) + ds_new = ds_generator() assert len(ds_new) == 4 engine = ds_new.image.chunk_engine @@ -57,24 +56,22 @@ def test_persist_with_local(local_storage): assert ds_new.meta.version == hub.__version__ - ds.delete() - -def test_persist_local_clear_cache(local_storage): - ds = Dataset(local_storage.root, local_cache_size=512) +@enabled_persistent_dataset_generators +def test_persist_clear_cache(ds_generator): + ds = ds_generator() ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) ds.clear_cache() - ds_new = Dataset(local_storage.root) + ds_new = ds_generator() assert len(ds_new) == 4 assert ds_new.image.shape == (4, 224, 224, 3) np.testing.assert_array_equal(ds_new.image.numpy(), np.ones((4, 224, 224, 3))) - ds.delete() -@parametrize_all_dataset_storages +@enabled_datasets def test_populate_dataset(ds): assert ds.meta.tensors == [] ds.create_tensor("image") @@ -99,13 +96,12 @@ def test_populate_dataset(ds): @pytest.mark.xfail(raises=NotImplementedError, strict=True) -def test_larger_data_memory(memory_storage): - ds = Dataset(memory_storage.root) - ds.create_tensor("image") - ds.image.extend(np.ones((4, 4096, 4096))) - assert len(ds) == 4 - assert ds.image.shape == (4, 4096, 4096) - np.testing.assert_array_equal(ds.image.numpy(), np.ones((4, 4096, 4096))) +def test_larger_data_memory(memory_ds): + memory_ds.create_tensor("image") + memory_ds.image.extend(np.ones((4, 4096, 4096))) + assert len(memory_ds) == 4 + assert memory_ds.image.shape == (4, 4096, 4096) + np.testing.assert_array_equal(memory_ds.image.numpy(), np.ones((4, 4096, 4096))) def test_stringify(memory_ds): @@ -130,7 +126,7 @@ def test_stringify_with_path(local_ds): assert str(ds) == f"Dataset(path='{local_ds.path}', tensors=[])" -@parametrize_all_dataset_storages +@enabled_datasets def test_compute_fixed_tensor(ds): ds.create_tensor("image") ds.image.extend(np.ones((32, 28, 28))) @@ -138,7 +134,7 @@ def test_compute_fixed_tensor(ds): np.testing.assert_array_equal(ds.image.numpy(), np.ones((32, 28, 28))) -@parametrize_all_dataset_storages +@enabled_datasets def test_compute_dynamic_tensor(ds): ds.create_tensor("image") @@ -170,7 +166,7 @@ def test_compute_dynamic_tensor(ds): assert image.is_dynamic -@parametrize_all_dataset_storages +@enabled_datasets def test_empty_samples(ds: Dataset): tensor = ds.create_tensor("with_empty") @@ -201,7 +197,7 @@ def test_empty_samples(ds: Dataset): np.testing.assert_array_equal(actual, expected) -@parametrize_all_dataset_storages +@enabled_datasets def test_scalar_samples(ds: Dataset): tensor = ds.create_tensor("scalars") @@ -249,7 +245,7 @@ def test_scalar_samples(ds: Dataset): tensor.append([1, 2]) -@parametrize_all_dataset_storages +@enabled_datasets def test_sequence_samples(ds: Dataset): tensor = ds.create_tensor("arrays") @@ -266,7 +262,7 @@ def test_sequence_samples(ds: Dataset): assert_array_lists_equal(tensor.numpy(aslist=True), expected) -@parametrize_all_dataset_storages +@enabled_datasets def test_iterate_dataset(ds): labels = [1, 9, 7, 4] ds.create_tensor("image") @@ -447,51 +443,24 @@ def test_fails_on_wrong_tensor_syntax(memory_ds): memory_ds.some_tensor = np.ones((28, 28)) -@pytest.mark.skipif(not has_hub_testing_creds(), reason="requires hub credentials") -def test_hub_cloud_dataset(): - username = "testingacc" - password = os.getenv("ACTIVELOOP_HUB_PASSWORD") - id = str(uuid.uuid1()) - - uri = f"hub://{username}/hub2ds2_{id}" - - client = HubBackendClient() - token = client.request_auth_token(username, password) - - with Dataset(uri, token=token) as ds: - ds.create_tensor("image") - ds.create_tensor("label", htype="class_label") - - for i in range(1000): - ds.image.append(i * np.ones((100, 100))) - ds.label.append(np.uint32(i)) - - ds = Dataset(uri, token=token) - for i in range(1000): - np.testing.assert_array_equal(ds.image[i].numpy(), i * np.ones((100, 100))) - np.testing.assert_array_equal(ds.label[i].numpy(), np.uint32(i)) - - ds.delete() - - def test_array_interface(memory_ds: Dataset): tensor = memory_ds.create_tensor("tensor") - x = np.random.random((32, 32)) + x = np.arange(10).reshape(5, 2) tensor.append(x) arr1 = np.array(tensor) arr2 = np.array(tensor) np.testing.assert_array_equal(x, arr1[0]) np.testing.assert_array_equal(x, arr2[0]) - assert arr1.__array_interface__["data"][0] == arr1.__array_interface__["data"][0] tensor.append(x) np.testing.assert_array_equal(tensor.numpy(), np.concatenate([arr1, arr2])) -@parametrize_all_dataset_storages -def test_hub_dataset_suffix_bug(ds): +def test_hub_dataset_suffix_bug(hub_cloud_ds, hub_cloud_dev_token): # creating dataset with similar name but some suffix removed from end - ds2 = Dataset(ds.path[:-1]) - ds2.delete() + ds = Dataset(hub_cloud_ds.path[:-1], token=hub_cloud_dev_token) + + # need to delete because it's a different path (won't be auto cleaned up) + ds.delete() def test_empty_dataset(): diff --git a/hub/api/tests/test_api_with_compression.py b/hub/api/tests/test_api_with_compression.py index 761f2e263a..78bc072057 100644 --- a/hub/api/tests/test_api_with_compression.py +++ b/hub/api/tests/test_api_with_compression.py @@ -6,11 +6,11 @@ import pytest from hub.api.tensor import Tensor from hub.tests.common import TENSOR_KEY +from hub.tests.dataset_fixtures import enabled_datasets import numpy as np import hub from hub import Dataset -from hub.core.tests.common import parametrize_all_dataset_storages def _populate_compressed_samples(tensor: Tensor, cat_path, flower_path, count=1): @@ -37,7 +37,7 @@ def _populate_compressed_samples(tensor: Tensor, cat_path, flower_path, count=1) return original_compressions -@parametrize_all_dataset_storages +@enabled_datasets def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path): images = ds.create_tensor(TENSOR_KEY, htype="image", sample_compression="png") @@ -55,7 +55,7 @@ def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path): assert images.shape_interval.upper == (5, 900, 900, 4) -@parametrize_all_dataset_storages +@enabled_datasets def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path): images = ds.create_tensor(TENSOR_KEY, htype="image", sample_compression="png") @@ -82,7 +82,7 @@ def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path): assert x.shape == expected_shape -@parametrize_all_dataset_storages +@enabled_datasets def test_uncompressed(ds: Dataset): images = ds.create_tensor(TENSOR_KEY, sample_compression=None) diff --git a/hub/api/tests/test_readonly.py b/hub/api/tests/test_readonly.py index 1e73337436..6bd0c361ff 100644 --- a/hub/api/tests/test_readonly.py +++ b/hub/api/tests/test_readonly.py @@ -3,7 +3,7 @@ import numpy as np from hub import Dataset -from hub.util.exceptions import ReadOnlyModeError +from hub.util.exceptions import CouldNotCreateNewDatasetException, ReadOnlyModeError def _assert_readonly_ops(ds, num_samples: int, sample_shape: Tuple[int]): @@ -18,15 +18,18 @@ def _assert_readonly_ops(ds, num_samples: int, sample_shape: Tuple[int]): assert ds.tensor.shape == (num_samples, *sample_shape) -def test_readonly(local_ds): - path = local_ds.path +def test_readonly(local_ds_generator): + ds = local_ds_generator() + ds.create_tensor("tensor") + ds.tensor.append(np.ones((100, 100))) + ds.read_only = True + _assert_readonly_ops(ds, 1, (100, 100)) - local_ds.create_tensor("tensor") - local_ds.tensor.append(np.ones((100, 100))) - local_ds.read_only = True - _assert_readonly_ops(local_ds, 1, (100, 100)) - del local_ds + ds = local_ds_generator() + ds.read_only = True + _assert_readonly_ops(ds, 1, (100, 100)) - local_ds = Dataset(path) - local_ds.read_only = True - _assert_readonly_ops(local_ds, 1, (100, 100)) + +@pytest.mark.xfail(raises=CouldNotCreateNewDatasetException, strict=True) +def test_readonly_doesnt_exist(local_path): + Dataset(local_path, read_only=True) diff --git a/hub/cli/test_cli.py b/hub/cli/test_cli.py index daf121c7a9..aec4a727c5 100644 --- a/hub/cli/test_cli.py +++ b/hub/cli/test_cli.py @@ -5,15 +5,13 @@ from click.testing import CliRunner from hub.cli.auth import login, logout -from hub.client.utils import has_hub_testing_creds -@pytest.mark.skipif(not has_hub_testing_creds(), reason="requires hub credentials") -def test_cli_auth(): +def test_cli_auth(hub_cloud_dev_credentials): + username, password = hub_cloud_dev_credentials + runner = CliRunner() - username = "testingacc" - password = os.getenv("ACTIVELOOP_HUB_PASSWORD") result = runner.invoke(login, f"-u {username} -p {password}") assert result.exit_code == 0 assert result.output == "Successfully logged in to Activeloop.\n" diff --git a/hub/client/client.py b/hub/client/client.py index 2cba6a9383..8a3defaad1 100644 --- a/hub/client/client.py +++ b/hub/client/client.py @@ -1,7 +1,7 @@ import sys import requests from typing import Optional -from hub.util.exceptions import LoginException +from hub.util.exceptions import InvalidPasswordException, LoginException from hub.client.utils import check_response_status, write_token, read_token from hub.client.config import ( HUB_REST_ENDPOINT, @@ -13,7 +13,6 @@ GET_DATASET_CREDENTIALS_SUFFIX, CREATE_DATASET_SUFFIX, DATASET_SUFFIX, - UPDATE_SUFFIX, ) from hub.client.log import logger import hub @@ -71,6 +70,9 @@ def request( headers (dict, optional): Dictionary of HTTP Headers to send with the request. timeout (float,optional): How many seconds to wait for the server to send data before giving up. + Raises: + InvalidPasswordException: `password` cannot be `None` inside `json`. + Returns: requests.Response: The response received from the server. """ @@ -97,13 +99,18 @@ def request( timeout=timeout, ) + # clearer error than `ServerUnderMaintenence` + if "password" in json and json["password"] is None: + # do NOT pass in the password here. `None` is explicitly typed. + raise InvalidPasswordException("Password cannot be `None`.") + check_response_status(response) return response def endpoint(self): - if hub.client.config.LOCAL: + if hub.client.config.USE_LOCAL_HOST: return HUB_REST_ENDPOINT_LOCAL - if hub.client.config.DEV: + if hub.client.config.USE_DEV_ENVIRONMENT: return HUB_REST_ENDPOINT_DEV return HUB_REST_ENDPOINT diff --git a/hub/client/config.py b/hub/client/config.py index 7d3e687eb8..31c505f85c 100644 --- a/hub/client/config.py +++ b/hub/client/config.py @@ -6,8 +6,8 @@ HUB_REST_ENDPOINT = "https://app.activeloop.ai" HUB_REST_ENDPOINT_DEV = "https://app.dev.activeloop.ai" HUB_REST_ENDPOINT_LOCAL = "http://localhost:5000" -LOCAL = False -DEV = False +USE_LOCAL_HOST = False +USE_DEV_ENVIRONMENT = False GET_TOKEN_SUFFIX = "/api/user/token" REGISTER_USER_SUFFIX = "/api/user/register" diff --git a/hub/client/test_client.py b/hub/client/test_client.py index 051af31689..6f525a771c 100644 --- a/hub/client/test_client.py +++ b/hub/client/test_client.py @@ -1,5 +1,3 @@ -import os - import pytest from hub.client.client import HubBackendClient @@ -7,14 +5,12 @@ write_token, read_token, remove_token, - has_hub_testing_creds, ) -@pytest.mark.skipif(not has_hub_testing_creds(), reason="requires hub credentials") -def test_client_requests(): - username = "testingacc" - password = os.getenv("ACTIVELOOP_HUB_PASSWORD") +def test_client_requests(hub_cloud_dev_credentials): + username, password = hub_cloud_dev_credentials + hub_client = HubBackendClient() hub_client.request_auth_token(username, password) with pytest.raises(Exception): diff --git a/hub/client/utils.py b/hub/client/utils.py index 9c38c563cb..b173169ca7 100644 --- a/hub/client/utils.py +++ b/hub/client/utils.py @@ -48,12 +48,6 @@ def remove_token(): os.remove(TOKEN_FILE_PATH) -def has_hub_testing_creds(): - """Checks if credentials exists""" - env = os.getenv("ACTIVELOOP_HUB_PASSWORD") - return env is not None - - def check_response_status(response: requests.Response): """Check response status and throw corresponding exception on failure.""" code = response.status_code diff --git a/hub/constants.py b/hub/constants.py index 8ccde0a17a..60f76469cb 100644 --- a/hub/constants.py +++ b/hub/constants.py @@ -48,6 +48,21 @@ # caclulate the number of bits to shift right when converting a 128-bit uuid into `ENCODING_DTYPE` UUID_SHIFT_AMOUNT = 128 - (8 * ENCODING_DTYPE(1).itemsize) + +HUB_CLOUD_DEV_USERNAME = "testingacc" + PYTEST_MEMORY_PROVIDER_BASE_ROOT = "mem://hub_pytest" PYTEST_LOCAL_PROVIDER_BASE_ROOT = "/tmp/hub_pytest/" # TODO: may fail for windows PYTEST_S3_PROVIDER_BASE_ROOT = "s3://hub-2.0-tests/" +PYTEST_HUB_CLOUD_PROVIDER_BASE_ROOT = f"hub://{HUB_CLOUD_DEV_USERNAME}/" + +HUB_DEV_PASSWORD_ENVIRONMENT_VARIABLE = "ACTIVELOOP_HUB_PASSWORD" + + +# pytest options +MEMORY_OPT = "--memory-skip" +LOCAL_OPT = "--local" +S3_OPT = "--s3" +HUB_CLOUD_OPT = "--hub-cloud" +S3_PATH_OPT = "--s3-path" +KEEP_STORAGE_OPT = "--keep-storage" diff --git a/hub/core/storage/tests/test_benchmark_storage_provider.py b/hub/core/storage/tests/test_benchmark_storage_provider.py deleted file mode 100644 index 4716ea9ab6..0000000000 --- a/hub/core/storage/tests/test_benchmark_storage_provider.py +++ /dev/null @@ -1,77 +0,0 @@ -import pytest - -from hub.constants import MB -from hub.tests.common_benchmark import ( - parametrize_benchmark_chunk_sizes, - BENCHMARK_CHUNK_SIZES, -) -from hub.core.tests.common import parametrize_all_caches, parametrize_all_storages -from hub.core.storage.tests.test_storage_provider import KEY # type: ignore - - -SIMULATED_DATA_SIZES = [128 * MB] - -# caclulate the number of chunks needed for each entry in `SIMULATED_DATA_SIZES` -NUM_CHUNKS = [] -for chunk_size in BENCHMARK_CHUNK_SIZES: - for data_size in SIMULATED_DATA_SIZES: - NUM_CHUNKS.append(data_size // chunk_size) - - -parametrize_benchmark_num_chunks = pytest.mark.parametrize("num_chunks", NUM_CHUNKS) - - -def write_to_files(storage, chunk_size, num_chunks): - chunk = b"1" * chunk_size - for i in range(num_chunks): - storage[f"{KEY}_{i}"] = chunk - storage.flush() - - -def read_from_files(storage, num_chunks): - for i in range(num_chunks): - storage[f"{KEY}_{i}"] - - -@pytest.mark.benchmark(group="storage_without_caches_write") -@parametrize_all_storages -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_storage_write_speeds(benchmark, storage, chunk_size, num_chunks): - benchmark(write_to_files, storage, chunk_size, num_chunks) - - -@pytest.mark.benchmark(group="storage_with_caches_write") -@parametrize_all_caches -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_cache_write_speeds(benchmark, storage, chunk_size, num_chunks): - benchmark(write_to_files, storage, chunk_size, num_chunks) - - -@pytest.mark.benchmark(group="storage_without_caches_read") -@parametrize_all_storages -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_storage_read_speeds(benchmark, storage, chunk_size, num_chunks): - write_to_files(storage, chunk_size, num_chunks) - benchmark(read_from_files, storage, num_chunks) - - -@pytest.mark.benchmark(group="storage_with_caches_read") -@parametrize_all_caches -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_cache_read_speeds(benchmark, storage, chunk_size, num_chunks): - write_to_files(storage, chunk_size, num_chunks) - benchmark(read_from_files, storage, num_chunks) - - -@pytest.mark.benchmark(group="storage_with_caches_read_write") -@parametrize_all_caches -@parametrize_benchmark_chunk_sizes -@parametrize_benchmark_num_chunks -def test_full_cache_read_speeds(benchmark, storage, chunk_size, num_chunks): - write_to_files(storage, chunk_size, num_chunks) - read_from_files(storage, num_chunks) - benchmark(read_from_files, storage, num_chunks) diff --git a/hub/core/storage/tests/test_readonly.py b/hub/core/storage/tests/test_readonly.py index e6d2aee019..c5005f4da8 100644 --- a/hub/core/storage/tests/test_readonly.py +++ b/hub/core/storage/tests/test_readonly.py @@ -1,39 +1,32 @@ import pytest -from hub.core.tests.common import parametrize_all_storages_and_caches -from hub.util.exceptions import ReadOnlyModeError +from hub.util.exceptions import CouldNotCreateNewDatasetException, ReadOnlyModeError from hub import Dataset +from hub.tests.storage_fixtures import enabled_storages @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@enabled_storages def test_readonly_del(storage): storage.enable_readonly() del storage["help!"] @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@enabled_storages def test_readonly_set(storage): storage.enable_readonly() storage["help! im stuck in a storage provider!"] = "salvation" @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@enabled_storages def test_readonly_clear(storage): storage.enable_readonly() storage.clear() @pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches +@enabled_storages def test_readonly_flush(storage): storage.enable_readonly() storage.flush() - - -@pytest.mark.xfail(raises=ReadOnlyModeError, strict=True) -@parametrize_all_storages_and_caches -def test_readonly_ds_create_tensor(storage): - ds = Dataset(read_only=True, storage=storage) - ds.create_tensor("test") diff --git a/hub/core/storage/tests/test_storage_provider.py b/hub/core/storage/tests/test_storage_provider.py index e4931edf00..93fc8dd25f 100644 --- a/hub/core/storage/tests/test_storage_provider.py +++ b/hub/core/storage/tests/test_storage_provider.py @@ -1,10 +1,12 @@ +from hub.tests.storage_fixtures import enabled_storages +from hub.tests.cache_fixtures import enabled_cache_chains import pytest from click.testing import CliRunner from hub.constants import MB -from hub.core.tests.common import parametrize_all_caches, parametrize_all_storages import pickle + KEY = "file" @@ -113,18 +115,18 @@ def check_cache(cache): check_cache_state(cache, expected_state=[set(), set(), 0, 0, 0, 0]) -@parametrize_all_storages +@enabled_storages def test_storage_provider(storage): check_storage_provider(storage) -@parametrize_all_caches -def test_cache(storage): - check_storage_provider(storage) - check_cache(storage) +@enabled_cache_chains +def test_cache(cache_chain): + check_storage_provider(cache_chain) + check_cache(cache_chain) -@parametrize_all_storages +@enabled_storages def test_pickling(storage): with CliRunner().isolated_filesystem(): FILE_1 = f"{KEY}_1" diff --git a/hub/core/tests/common.py b/hub/core/tests/common.py deleted file mode 100644 index 0f9e8a2be8..0000000000 --- a/hub/core/tests/common.py +++ /dev/null @@ -1,43 +0,0 @@ -import pytest - -import pytest - -STORAGE_FIXTURE_NAME = "storage" -DATASET_FIXTURE_NAME = "ds" - -MEMORY = "memory" -LOCAL = "local" -S3 = "s3" - -ALL_PROVIDERS = [MEMORY, LOCAL, S3] - -_ALL_CACHES_TUPLES = [(MEMORY, LOCAL), (MEMORY, S3), (LOCAL, S3), (MEMORY, LOCAL, S3)] -ALL_CACHES = list(map(lambda i: ",".join(i), _ALL_CACHES_TUPLES)) - -parametrize_all_storages = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_PROVIDERS, - indirect=True, -) - -parametrize_all_caches = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_CACHES, - indirect=True, -) - -parametrize_all_storages_and_caches = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_PROVIDERS + ALL_CACHES, - indirect=True, -) - -parametrize_all_dataset_storages = pytest.mark.parametrize( - DATASET_FIXTURE_NAME, ALL_PROVIDERS, indirect=True -) - -parametrize_all_dataset_storages_and_caches = pytest.mark.parametrize( - STORAGE_FIXTURE_NAME, - ALL_PROVIDERS + ALL_CACHES, # type: ignore - indirect=True, -) diff --git a/hub/integrations/tests/test_pytorch.py b/hub/integrations/tests/test_pytorch.py index d9a08db187..c650c67761 100644 --- a/hub/integrations/tests/test_pytorch.py +++ b/hub/integrations/tests/test_pytorch.py @@ -1,4 +1,4 @@ -import sys +from hub.util.remove_cache import get_base_storage import pickle import pytest from hub.util.remove_cache import get_base_storage @@ -9,7 +9,7 @@ from hub.integrations.pytorch.pytorch_old import dataset_to_pytorch from hub.util.check_installation import requires_torch -from hub.core.tests.common import parametrize_all_dataset_storages +from hub.tests.dataset_fixtures import enabled_datasets def to_tuple(sample): @@ -17,7 +17,7 @@ def to_tuple(sample): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_pytorch_small(ds): with ds: ds.create_tensor("image") @@ -78,7 +78,7 @@ def test_pytorch_small(ds): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_pytorch_transform(ds): with ds: ds.create_tensor("image") @@ -103,7 +103,7 @@ def test_pytorch_transform(ds): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_pytorch_with_compression(ds: Dataset): # TODO: chunk-wise compression for labels (right now they are uncompressed) with ds: @@ -130,7 +130,7 @@ def test_pytorch_with_compression(ds: Dataset): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_pytorch_small_old(ds): with ds: ds.create_tensor("image") @@ -160,7 +160,7 @@ def test_pytorch_small_old(ds): @requires_torch -@parametrize_all_dataset_storages +@enabled_datasets def test_custom_tensor_order(ds): with ds: tensors = ["a", "b", "c", "d"] diff --git a/hub/integrations/tests/test_tensorflow.py b/hub/integrations/tests/test_tensorflow.py index 90b77e3329..7a81f7c84c 100644 --- a/hub/integrations/tests/test_tensorflow.py +++ b/hub/integrations/tests/test_tensorflow.py @@ -34,4 +34,3 @@ def test_tensorflow_small(local_ds): # converting tf Tensors to numpy np.testing.assert_array_equal(batch["image"].numpy(), i * np.ones((300, 300))) np.testing.assert_array_equal(batch["image2"].numpy(), i * np.ones((100, 100))) - local_ds.delete() diff --git a/hub/tests/cache_fixtures.py b/hub/tests/cache_fixtures.py new file mode 100644 index 0000000000..2924548568 --- /dev/null +++ b/hub/tests/cache_fixtures.py @@ -0,0 +1,53 @@ +from hub.util.cache_chain import get_cache_chain +from hub.constants import MIN_FIRST_CACHE_SIZE, MIN_SECOND_CACHE_SIZE +import pytest + +_MEMORY = "memory_storage" +_LOCAL = "local_storage" +_S3 = "s3_storage" +_HUB_CLOUD = "hub_cloud_storage" + +CACHE_CHAINS = [ + (_MEMORY, _LOCAL), + (_MEMORY, _S3), + (_MEMORY, _HUB_CLOUD), + (_LOCAL, _S3), + (_LOCAL, _HUB_CLOUD), + (_MEMORY, _LOCAL, _S3), + (_MEMORY, _LOCAL, _HUB_CLOUD), +] +CACHE_CHAINS = list(map(lambda i: ",".join(i), CACHE_CHAINS)) # type: ignore + + +enabled_cache_chains = pytest.mark.parametrize( + "cache_chain", + CACHE_CHAINS, + indirect=True, +) + + +@pytest.fixture +def cache_chain(request): + requested_storages = request.param.split(",") + + storages = [] + cache_sizes = [] + + # will automatically skip if a storage is not enabled + for requested_storage in requested_storages: + storage = request.getfixturevalue(requested_storage) + storages.append(storage) + + if len(cache_sizes) == 0: + cache_sizes.append(MIN_FIRST_CACHE_SIZE) + elif len(cache_sizes) == 1: + cache_sizes.append(MIN_SECOND_CACHE_SIZE) + + if len(storages) == len(cache_sizes): + cache_sizes.pop() + + assert ( + len(storages) == len(cache_sizes) + 1 + ), f"Invalid test composition. {len(storages)} != {len(cache_sizes)} - 1" + + return get_cache_chain(storages, cache_sizes) diff --git a/hub/tests/client_fixtures.py b/hub/tests/client_fixtures.py new file mode 100644 index 0000000000..108b68776f --- /dev/null +++ b/hub/tests/client_fixtures.py @@ -0,0 +1,34 @@ +from hub.constants import ( + HUB_CLOUD_DEV_USERNAME, + HUB_CLOUD_OPT, + HUB_DEV_PASSWORD_ENVIRONMENT_VARIABLE, +) +from hub.tests.common import is_opt_true +import os +import pytest +from hub.client.client import HubBackendClient + + +@pytest.fixture(scope="session") +def hub_cloud_dev_credentials(request): + if not is_opt_true(request, HUB_CLOUD_OPT): + pytest.skip() + + # TODO: use dev environment + + password = os.getenv(HUB_DEV_PASSWORD_ENVIRONMENT_VARIABLE) + + assert ( + password is not None + ), f"Hub dev password was not found in the environment variable '{HUB_DEV_PASSWORD_ENVIRONMENT_VARIABLE}'. This is necessary for testing hub cloud datasets." + + return HUB_CLOUD_DEV_USERNAME, password + + +@pytest.fixture(scope="session") +def hub_cloud_dev_token(hub_cloud_dev_credentials): + username, password = hub_cloud_dev_credentials + + client = HubBackendClient() + token = client.request_auth_token(username, password) + return token diff --git a/hub/tests/common.py b/hub/tests/common.py index 457a237a2a..f8168411c7 100644 --- a/hub/tests/common.py +++ b/hub/tests/common.py @@ -1,10 +1,9 @@ from PIL import Image, UnidentifiedImageError # type: ignore from io import BytesIO -from hub.api.tensor import Tensor import os import pathlib -from typing import Optional, Sequence, Tuple, List -from uuid import uuid1 +from typing import List, Optional +from uuid import uuid4 import numpy as np import posixpath @@ -12,7 +11,7 @@ from hub.constants import KB, MB -SESSION_ID = str(uuid1()) +SESSION_ID = str(uuid4())[:4] # 4 ascii chars should be sufficient _THIS_FILE = pathlib.Path(__file__).parent.absolute() TENSOR_KEY = "tensor" @@ -54,42 +53,6 @@ def get_dummy_data_path(subpath: str = ""): return os.path.join(_THIS_FILE, "dummy_data" + os.sep, subpath) -def get_random_array(shape: Tuple[int], dtype: str) -> np.ndarray: - dtype = dtype.lower() - - if "int" in dtype: - low = np.iinfo(dtype).min - high = np.iinfo(dtype).max - return np.random.randint(low=low, high=high, size=shape, dtype=dtype) - - if "float" in dtype: - # `low`/`high` have to be `float16` instead of `dtype` because `np.random.uniform` only supports `float16` - low = np.finfo("float16").min - high = np.finfo("float16").max - return np.random.uniform(low=low, high=high, size=shape).astype(dtype) - - if "bool" in dtype: - a = np.random.uniform(size=shape) - return a > 0.5 - - raise ValueError(f"Dtype '{dtype}' not supported.") - - -@parametrize_dtypes -@pytest.mark.parametrize( - SHAPE_PARAM, - ( - (100, 100), - (1,), - (1, 1, 1, 1, 1), - ), -) -def test_get_random_array(shape: Tuple[int], dtype: str): - array = get_random_array(shape, dtype) - assert array.shape == shape - assert array.dtype == dtype - - def get_actual_compression_from_buffer(buffer: memoryview) -> Optional[str]: """Helpful for checking if actual compression matches expected.""" @@ -106,3 +69,8 @@ def assert_array_lists_equal(l1: List[np.ndarray], l2: List[np.ndarray]): """Assert that two lists of numpy arrays are equal""" for idx, (a1, a2) in enumerate(zip(l1, l2)): np.testing.assert_array_equal(a1, a2, err_msg=f"Array mismatch at index {idx}") + + +def is_opt_true(request, opt) -> bool: + """Returns if the pytest option `opt` is True. Assumes `opt` is a boolean value.""" + return request.config.getoption(opt) diff --git a/hub/tests/common_benchmark.py b/hub/tests/common_benchmark.py deleted file mode 100644 index 10c020abb7..0000000000 --- a/hub/tests/common_benchmark.py +++ /dev/null @@ -1,36 +0,0 @@ -import pytest - -from hub.constants import MB -from hub.tests.common import ( - CHUNK_SIZE_PARAM, - SHAPE_PARAM, - NUM_BATCHES_PARAM, - DTYPE_PARAM, -) - -# benchmark parametrizations -BENCHMARK_NUM_BATCHES = (1,) -BENCHMARK_DTYPES = ("int64",) -BENCHMARK_CHUNK_SIZES = (16 * MB,) - -FULL_BENCHMARK_BATCHED_SHAPES = ( - # with int64/float64 = ~1GB - (840, 224, 224, 3), -) - -BENCHMARK_BATCHED_SHAPES = ((5, 224, 224, 3),) - -# benchmark parametrize decorators -parametrize_benchmark_chunk_sizes = pytest.mark.parametrize( - CHUNK_SIZE_PARAM, BENCHMARK_CHUNK_SIZES -) -parametrize_benchmark_dtypes = pytest.mark.parametrize(DTYPE_PARAM, BENCHMARK_DTYPES) -parametrize_benchmark_shapes = pytest.mark.parametrize( - SHAPE_PARAM, BENCHMARK_BATCHED_SHAPES -) -parametrize_full_benchmark_shapes = pytest.mark.parametrize( - SHAPE_PARAM, FULL_BENCHMARK_BATCHED_SHAPES -) -parametrize_benchmark_num_batches = pytest.mark.parametrize( - NUM_BATCHES_PARAM, BENCHMARK_NUM_BATCHES -) diff --git a/hub/tests/dataset_fixtures.py b/hub/tests/dataset_fixtures.py new file mode 100644 index 0000000000..3e7a7a63fd --- /dev/null +++ b/hub/tests/dataset_fixtures.py @@ -0,0 +1,71 @@ +import pytest +from hub import Dataset + + +enabled_datasets = pytest.mark.parametrize( + "ds", + ["memory_ds", "local_ds", "s3_ds", "hub_cloud_ds"], + indirect=True, +) + +enabled_persistent_dataset_generators = pytest.mark.parametrize( + "ds_generator", + ["local_ds_generator", "s3_ds_generator", "hub_cloud_ds_generator"], + indirect=True, +) + + +@pytest.fixture +def memory_ds(memory_path): + return Dataset(memory_path) + + +@pytest.fixture +def local_ds(local_ds_generator): + return local_ds_generator() + + +@pytest.fixture +def local_ds_generator(local_path): + def generate_local_ds(): + return Dataset(local_path) + + return generate_local_ds + + +@pytest.fixture +def s3_ds(s3_ds_generator): + return s3_ds_generator() + + +@pytest.fixture +def s3_ds_generator(s3_path): + def generate_s3_ds(): + return Dataset(s3_path) + + return generate_s3_ds + + +@pytest.fixture +def hub_cloud_ds(hub_cloud_ds_generator): + return hub_cloud_ds_generator() + + +@pytest.fixture +def hub_cloud_ds_generator(hub_cloud_path, hub_cloud_dev_token): + def generate_hub_cloud_ds(): + return Dataset(hub_cloud_path, token=hub_cloud_dev_token) + + return generate_hub_cloud_ds + + +@pytest.fixture +def ds(request): + """Used with parametrize to use all enabled dataset fixtures.""" + return request.getfixturevalue(request.param) + + +@pytest.fixture +def ds_generator(request): + """Used with parametrize to use all enabled persistent dataset generator fixtures.""" + return request.getfixturevalue(request.param) diff --git a/hub/tests/path_fixtures.py b/hub/tests/path_fixtures.py new file mode 100644 index 0000000000..08a429be22 --- /dev/null +++ b/hub/tests/path_fixtures.py @@ -0,0 +1,154 @@ +from hub.util.get_storage_provider import storage_provider_from_hub_path +from hub.core.storage.s3 import S3Provider +from hub.core.storage.local import LocalProvider +import os +from conftest import S3_PATH_OPT +from hub.constants import ( + HUB_CLOUD_OPT, + KEEP_STORAGE_OPT, + LOCAL_OPT, + MEMORY_OPT, + PYTEST_HUB_CLOUD_PROVIDER_BASE_ROOT, + PYTEST_LOCAL_PROVIDER_BASE_ROOT, + PYTEST_MEMORY_PROVIDER_BASE_ROOT, + S3_OPT, +) +import posixpath +from hub.tests.common import ( + SESSION_ID, + current_test_name, + get_dummy_data_path, + is_opt_true, +) +import pytest + +MEMORY = "memory" +LOCAL = "local" +S3 = "s3" +HUB_CLOUD = "hub_cloud" + + +def _get_path_composition_configs(request): + return { + MEMORY: { + "base_root": PYTEST_MEMORY_PROVIDER_BASE_ROOT, + "use_id": False, + "is_id_prefix": False, + # if is_id_prefix (and use_id=True), the session id comes before test name, otherwise it is reversed + "use_underscores": False, + }, + LOCAL: { + "base_root": PYTEST_LOCAL_PROVIDER_BASE_ROOT, + "use_id": False, + "is_id_prefix": False, + "use_underscores": False, + }, + S3: { + "base_root": request.config.getoption(S3_PATH_OPT), + "use_id": True, + "is_id_prefix": True, + "use_underscores": False, + }, + HUB_CLOUD: { + "base_root": PYTEST_HUB_CLOUD_PROVIDER_BASE_ROOT, + "use_id": True, + "is_id_prefix": True, + "use_underscores": True, + }, + } + + +def _get_storage_path( + request, storage_name, with_current_test_name=True, info_override={} +): + info = _get_path_composition_configs(request)[storage_name] + info.update(info_override) + + root = info["base_root"] + + path = "" + if with_current_test_name: + path = current_test_name() + + if info["use_id"]: + if info["is_id_prefix"]: + path = posixpath.join(SESSION_ID, path) + else: + path = posixpath.join(path, SESSION_ID) + + if info["use_underscores"]: + path = path.replace("/", "_") + + root = posixpath.join(root, path) + return root + + +@pytest.fixture +def memory_path(request): + if is_opt_true(request, MEMORY_OPT): + pytest.skip() + return + + # no need to clear memory paths + return _get_storage_path(request, MEMORY) + + +@pytest.fixture +def local_path(request): + if not is_opt_true(request, LOCAL_OPT): + pytest.skip() + return + + path = _get_storage_path(request, LOCAL) + + yield path + + # clear storage unless flagged otherwise + if not is_opt_true(request, KEEP_STORAGE_OPT): + LocalProvider(path).clear() + + +@pytest.fixture +def s3_path(request): + if not is_opt_true(request, S3_OPT): + pytest.skip() + return + + path = _get_storage_path(request, S3) + + yield path + + # clear storage unless flagged otherwise + if not is_opt_true(request, KEEP_STORAGE_OPT): + S3Provider(path).clear() + + +@pytest.fixture +def hub_cloud_path(request, hub_cloud_dev_token): + if not is_opt_true(request, HUB_CLOUD_OPT): + pytest.skip() + return + + path = _get_storage_path(request, HUB_CLOUD) + + yield path + + # clear storage unless flagged otherwise + if not is_opt_true(request, KEEP_STORAGE_OPT): + storage_provider_from_hub_path(path, token=hub_cloud_dev_token).clear() + + +@pytest.fixture +def cat_path(): + """Path to a cat image in the dummy data folder.""" + + path = get_dummy_data_path("compressed_images") + return os.path.join(path, "cat.jpeg") + + +@pytest.fixture +def flower_path(): + """Path to a flower image in the dummy data folder.""" + + path = get_dummy_data_path("compressed_images") + return os.path.join(path, "flower.png") diff --git a/hub/tests/storage_fixtures.py b/hub/tests/storage_fixtures.py new file mode 100644 index 0000000000..d685b492b3 --- /dev/null +++ b/hub/tests/storage_fixtures.py @@ -0,0 +1,38 @@ +from hub.util.get_storage_provider import storage_provider_from_hub_path +from hub.core.storage.s3 import S3Provider +from hub.core.storage.local import LocalProvider +from hub.core.storage.memory import MemoryProvider +import pytest + + +enabled_storages = pytest.mark.parametrize( + "storage", + ["memory_storage", "local_storage", "s3_storage", "hub_cloud_storage"], + indirect=True, +) + + +@pytest.fixture +def memory_storage(memory_path): + return MemoryProvider(memory_path) + + +@pytest.fixture +def local_storage(local_path): + return LocalProvider(local_path) + + +@pytest.fixture +def s3_storage(s3_path): + return S3Provider(s3_path) + + +@pytest.fixture +def hub_cloud_storage(hub_cloud_path, hub_cloud_dev_token): + return storage_provider_from_hub_path(hub_cloud_path, token=hub_cloud_dev_token) + + +@pytest.fixture +def storage(request): + """Used with parametrize to use all enabled storage fixtures.""" + return request.getfixturevalue(request.param) diff --git a/hub/util/check_installation.py b/hub/util/check_installation.py index 0ccb8a5c86..d098a4e22f 100644 --- a/hub/util/check_installation.py +++ b/hub/util/check_installation.py @@ -21,7 +21,7 @@ def tensorflow_installed(): return True -def tfds_installed(): +def _tfds_installed(): try: import tensorflow_datasets # type: ignore @@ -40,5 +40,5 @@ def tfds_installed(): ) requires_tfds = pytest.mark.skipif( - not tfds_installed(), reason="requires tensorflow_datasets to be installed" + not _tfds_installed(), reason="requires tensorflow_datasets to be installed" ) diff --git a/hub/util/exceptions.py b/hub/util/exceptions.py index bab69f6911..d226f46733 100644 --- a/hub/util/exceptions.py +++ b/hub/util/exceptions.py @@ -140,6 +140,28 @@ def __init__( super().__init__(message) +class InvalidPasswordException(AuthorizationException): + def __init__( + self, + message="The password you provided was invalid.", + ): + super().__init__(message) + + +class CouldNotCreateNewDatasetException(AuthorizationException): + def __init__( + self, + path: str, + ): + + extra = "" + if path.startswith("hub://"): + extra = "Since the path is a `hub://` dataset, if you believe you should have write permissions, try running `activeloop login`." + + message = f"Dataset at '{path}' doesn't exist, and you have no permissions to create one there. Maybe a typo? {extra}" + super().__init__(message) + + class ResourceNotFoundException(Exception): def __init__( self, diff --git a/hub/util/tests/test_from_tfds.py b/hub/util/tests/test_from_tfds.py index f7e3de539c..5f191b41db 100644 --- a/hub/util/tests/test_from_tfds.py +++ b/hub/util/tests/test_from_tfds.py @@ -1,13 +1,12 @@ import numpy as np -import pytest -from hub.core.tests.common import parametrize_all_dataset_storages -from hub.util.check_installation import tfds_installed, requires_tfds, requires_tensorflow # type: ignore +from hub.util.check_installation import requires_tfds, requires_tensorflow # type: ignore +from hub.tests.dataset_fixtures import enabled_datasets @requires_tfds def test_from_tfds_to_path(local_storage): - from hub.util.from_tfds import from_tfds, from_tfds_to_path # type: ignore + from hub.util.from_tfds import from_tfds_to_path # type: ignore hub_ds = from_tfds_to_path( tfds_dataset_name="mnist", @@ -20,7 +19,7 @@ def test_from_tfds_to_path(local_storage): @requires_tensorflow @requires_tfds -@parametrize_all_dataset_storages +@enabled_datasets def test_from_tfds(ds): import tensorflow_datasets as tfds # type: ignore from hub.util.from_tfds import from_tfds, from_tfds_to_path # type: ignore diff --git a/setup.cfg b/setup.cfg index dacc7f70d9..8de5c249a9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,5 @@ [tool:pytest] addopts = -s -markers = - full_benchmark: mark a benchmark that will take a long time as "full". in order to run this benchmark, the `--full-benchmarks` option is required. [mypy] warn_redundant_casts = True