Skip to content

changed get_object request based on discussion

Sign in for the full log view
GitHub Actions / JUnit Test Report failed May 9, 2024 in 0s

2998 tests run, 1671 passed, 1319 skipped, 8 failed.

Annotations

Check failure on line 130 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deepmemory.test_deepmemory_train_and_cancel

deeplake.util.exceptions.DatasetHandlerError: Path hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_copy is empty or does not exist. Cannot delete.
Raw output
path = 'hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_copy'
force = True, large_ok = True, creds = {}
token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
verbose = False

    @staticmethod
    @spinner
    def delete(
        path: Union[str, pathlib.Path],
        force: bool = False,
        large_ok: bool = False,
        creds: Optional[Union[dict, str]] = None,
        token: Optional[str] = None,
        verbose: bool = False,
    ) -> None:
        """Deletes a dataset at a given path.
    
        Args:
            path (str, pathlib.Path): The path to the dataset to be deleted.
            force (bool): Delete data regardless of whether
                it looks like a deeplake dataset. All data at the path will be removed if set to ``True``.
            large_ok (bool): Delete datasets larger than 1GB. Disabled by default.
            creds (dict, str, optional): The string ``ENV`` or a dictionary containing credentials used to access the dataset at the path.
                - If 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token' are present, these take precedence over credentials present in the environment or in credentials file. Currently only works with s3 paths.
                - It supports 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token', 'endpoint_url', 'aws_region', 'profile_name' as keys.
                - If 'ENV' is passed, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets. For datasets connected to hub cloud, specifying 'ENV' will override the credentials fetched from Activeloop and use local ones.
            token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
            verbose (bool): If True, logs will be printed. Defaults to True.
    
        Raises:
            DatasetHandlerError: If a Dataset does not exist at the given path and ``force = False``.
            UserNotLoggedInException: When user is not authenticated.
            NotImplementedError: When attempting to delete a managed view.
            ValueError: If version is specified in the path
    
        Warning:
            This is an irreversible operation. Data once deleted cannot be recovered.
        """
        path, address = process_dataset_path(path)
    
        if address:
            raise ValueError(
                "deeplake.delete does not accept version address in the dataset path."
            )
    
        if creds is None:
            creds = {}
    
        feature_report_path(
            path, "delete", {"Force": force, "Large_OK": large_ok}, token=token
        )
    
        try:
            qtokens = ["/.queries/", "\\.queries\\"]
            for qt in qtokens:
                if qt in path:
                    raise NotImplementedError(
                        "Deleting managed views by path is not supported. Load the source dataset and do `ds.delete_view(id)` instead."
                    )
            try:
                ds = deeplake.load(path, verbose=False, token=token, creds=creds)
            except UserNotLoggedInException:
                raise UserNotLoggedInException from None
    
>           ds.delete(large_ok=large_ok)

deeplake/api/dataset.py:905: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

During handling of the above exception, another exception occurred:

capsys = <_pytest.capture.CaptureFixture object at 0x7ffaeaec3ac0>
corpus_query_relevances_copy = ('hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel', ['0-dimensional biomaterials lack induc...]], [['32587939', 1]], ...], 'hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_eval_queries')
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @pytest.mark.slow
    @pytest.mark.flaky(reruns=3)
    @pytest.mark.skipif(sys.platform == "win32", reason="Does not run on Windows")
    def test_deepmemory_train_and_cancel(
        capsys,
        corpus_query_relevances_copy,
        hub_cloud_dev_token,
    ):
        corpus, queries, relevances, _ = corpus_query_relevances_copy
    
        db = VectorStore(
            path=corpus,
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
        )
    
        with pytest.raises(ValueError):
            # When embedding_function is provided neither in the constructor nor in the train method
            job_id = db.deep_memory.train(
                queries=queries,
                relevance=relevances,
            )
    
        job_id = db.deep_memory.train(
            queries=queries,
            relevance=relevances,
            embedding_function=embedding_fn,
        )
    
        # cancelling right after starting the job
        cancelled = db.deep_memory.cancel(job_id)
        assert cancelled == True
    
        # deleting the job
        deleted = db.deep_memory.delete(job_id)
        assert deleted == True
    
        # when embedding function is provided in the constructor
        deeplake.deepcopy(
            corpus,
            corpus + "_copy",
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
        )
    
        db = VectorStore(
            path=corpus + "_copy",
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
            embedding_function=DummyEmbedder,
        )
    
        job_id = db.deep_memory.train(
            queries=queries,
            relevance=relevances,
            embedding_function=embedding_fn,
        )
    
        # TODO: Investigate why it is flaky
        # # cancelling right after starting the job
        # cancelled = db.deep_memory.cancel(job_id)
        # assert cancelled == True
    
        # # deleting the job
        # deleted = db.deep_memory.delete(job_id)
        # assert deleted == True
    
        # cancelled = db.deep_memory.cancel("non-existent-job-id")
        # out_str = capsys.readouterr()
        # error_str = (
        #     "Job with job_id='non-existent-job-id' was not cancelled!\n "
        #     "Error: Entity non-existent-job-id does not exist.\n"
        # )
        # assert cancelled == False
        # assert out_str.out == error_str
    
>       deeplake.delete(
            corpus + "_copy", force=True, large_ok=True, token=hub_cloud_dev_token
        )

deeplake/core/vectorstore/deep_memory/test_deepmemory.py:130: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

path = 'hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_copy'
force = True, large_ok = True, creds = {}
token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
verbose = False

    @staticmethod
    @spinner
    def delete(
        path: Union[str, pathlib.Path],
        force: bool = False,
        large_ok: bool = False,
        creds: Optional[Union[dict, str]] = None,
        token: Optional[str] = None,
        verbose: bool = False,
    ) -> None:
        """Deletes a dataset at a given path.
    
        Args:
            path (str, pathlib.Path): The path to the dataset to be deleted.
            force (bool): Delete data regardless of whether
                it looks like a deeplake dataset. All data at the path will be removed if set to ``True``.
            large_ok (bool): Delete datasets larger than 1GB. Disabled by default.
            creds (dict, str, optional): The string ``ENV`` or a dictionary containing credentials used to access the dataset at the path.
                - If 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token' are present, these take precedence over credentials present in the environment or in credentials file. Currently only works with s3 paths.
                - It supports 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token', 'endpoint_url', 'aws_region', 'profile_name' as keys.
                - If 'ENV' is passed, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets. For datasets connected to hub cloud, specifying 'ENV' will override the credentials fetched from Activeloop and use local ones.
            token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
            verbose (bool): If True, logs will be printed. Defaults to True.
    
        Raises:
            DatasetHandlerError: If a Dataset does not exist at the given path and ``force = False``.
            UserNotLoggedInException: When user is not authenticated.
            NotImplementedError: When attempting to delete a managed view.
            ValueError: If version is specified in the path
    
        Warning:
            This is an irreversible operation. Data once deleted cannot be recovered.
        """
        path, address = process_dataset_path(path)
    
        if address:
            raise ValueError(
                "deeplake.delete does not accept version address in the dataset path."
            )
    
        if creds is None:
            creds = {}
    
        feature_report_path(
            path, "delete", {"Force": force, "Large_OK": large_ok}, token=token
        )
    
        try:
            qtokens = ["/.queries/", "\\.queries\\"]
            for qt in qtokens:
                if qt in path:
                    raise NotImplementedError(
                        "Deleting managed views by path is not supported. Load the source dataset and do `ds.delete_view(id)` instead."
                    )
            try:
                ds = deeplake.load(path, verbose=False, token=token, creds=creds)
            except UserNotLoggedInException:
                raise UserNotLoggedInException from None
    
            ds.delete(large_ok=large_ok)
            if verbose:
                logger.info(f"{path} dataset deleted successfully.")
        except Exception as e:
            if force:
                base_storage = storage_provider_from_path(
                    path=path,
                    creds=creds,
                    read_only=False,
                    token=token,
                )
                if len(base_storage) == 0:
>                   raise DatasetHandlerError(
                        f"Path {path} is empty or does not exist. Cannot delete."
                    )
E                   deeplake.util.exceptions.DatasetHandlerError: Path hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_copy is empty or does not exist. Cannot delete.

deeplake/api/dataset.py:917: DatasetHandlerError

Check failure on line 750 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_hub_dataset_suffix_bug

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
hub_cloud_ds = Dataset(path='hub://testingacc2/tmp6189_test_api_test_hub_dataset_suffix_bug', tensors=[])
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @pytest.mark.slow
    def test_hub_dataset_suffix_bug(hub_cloud_ds, hub_cloud_dev_token):
        # creating dataset with similar name but some suffix removed from end
        ds = deeplake.dataset(hub_cloud_ds.path[:-1], token=hub_cloud_dev_token)
    
        # need to delete because it's a different path (won't be auto cleaned up)
>       ds.delete()

deeplake/api/tests/test_api.py:750: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 976 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_dataset_rename[True-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7fd8aaedc9a0>
path = 'hub://testingacc2/tmp6189_test_api_test_dataset_rename-True-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
convert_to_pathlib = True

    @pytest.mark.parametrize(
        ("ds_generator", "path", "hub_token"),
        [
            ("local_ds_generator", "local_path", "hub_cloud_dev_token"),
            pytest.param(
                "s3_ds_generator", "s3_path", "hub_cloud_dev_token", marks=pytest.mark.slow
            ),
            pytest.param(
                "gcs_ds_generator",
                "gcs_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
            pytest.param(
                "azure_ds_generator",
                "azure_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
            pytest.param(
                "hub_cloud_ds_generator",
                "hub_cloud_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("convert_to_pathlib", [True, False])
    def test_dataset_rename(ds_generator, path, hub_token, convert_to_pathlib):
        ds = ds_generator()
        ds.create_tensor("abc")
        ds.abc.append([1, 2, 3, 4])
    
        new_path = "_".join([path, "renamed"])
    
        ds.path = convert_string_to_pathlib_if_needed(ds.path, convert_to_pathlib)
        new_path = convert_string_to_pathlib_if_needed(new_path, convert_to_pathlib)
    
        with pytest.raises(RenameError):
            ds.rename("wrongfolder/new_ds")
    
        if str(ds.path).startswith("hub://"):
            with pytest.raises(BadRequestException):
                ds.rename(ds.path)
        else:
            with pytest.raises(PathNotEmptyException):
                ds.rename(ds.path)
    
>       ds = deeplake.rename(ds.path, new_path, token=hub_token)

deeplake/api/tests/test_api.py:976: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/api/dataset.py:842: in rename
    deeplake.delete(old_path, token=token, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 976 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_dataset_rename[False-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7fd7bbae5d00>
path = 'hub://testingacc2/tmp6189_test_api_test_dataset_rename-False-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
convert_to_pathlib = False

    @pytest.mark.parametrize(
        ("ds_generator", "path", "hub_token"),
        [
            ("local_ds_generator", "local_path", "hub_cloud_dev_token"),
            pytest.param(
                "s3_ds_generator", "s3_path", "hub_cloud_dev_token", marks=pytest.mark.slow
            ),
            pytest.param(
                "gcs_ds_generator",
                "gcs_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
            pytest.param(
                "azure_ds_generator",
                "azure_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
            pytest.param(
                "hub_cloud_ds_generator",
                "hub_cloud_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("convert_to_pathlib", [True, False])
    def test_dataset_rename(ds_generator, path, hub_token, convert_to_pathlib):
        ds = ds_generator()
        ds.create_tensor("abc")
        ds.abc.append([1, 2, 3, 4])
    
        new_path = "_".join([path, "renamed"])
    
        ds.path = convert_string_to_pathlib_if_needed(ds.path, convert_to_pathlib)
        new_path = convert_string_to_pathlib_if_needed(new_path, convert_to_pathlib)
    
        with pytest.raises(RenameError):
            ds.rename("wrongfolder/new_ds")
    
        if str(ds.path).startswith("hub://"):
            with pytest.raises(BadRequestException):
                ds.rename(ds.path)
        else:
            with pytest.raises(PathNotEmptyException):
                ds.rename(ds.path)
    
>       ds = deeplake.rename(ds.path, new_path, token=hub_token)

deeplake/api/tests/test_api.py:976: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/api/dataset.py:842: in rename
    deeplake.delete(old_path, token=token, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 1036 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_dataset_deepcopy[True-2-hub_cloud_path-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
path = 'hub://testingacc2/tmp6189_test_api_test_dataset_deepcopy-True-2-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
num_workers = 2, progressbar = True

    @pytest.mark.parametrize(
        "path,hub_token",
        [
            ["local_path", "hub_cloud_dev_token"],
            pytest.param("hub_cloud_path", "hub_cloud_dev_token", marks=pytest.mark.slow),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("num_workers", [2])
    @pytest.mark.parametrize("progressbar", [True])
    def test_dataset_deepcopy(path, hub_token, num_workers, progressbar):
        src_path = "_".join((path, "src1"))
        dest_path = "_".join((path, "dest1"))
    
        src_ds = deeplake.empty(src_path, overwrite=True, token=hub_token)
    
        with src_ds:
            src_ds.info.update(key=0)
    
            src_ds.create_tensor("a", htype="image", sample_compression="png")
            src_ds.create_tensor("b", htype="class_label")
            src_ds.create_tensor("c")
            src_ds.create_tensor("d", dtype=bool)
    
            src_ds.d.info.update(key=1)
    
            src_ds["a"].append(np.ones((28, 28), dtype="uint8"))
            src_ds["b"].append(0)
    
        dest_ds = deeplake.deepcopy(
            src_path,
            dest_path,
            token=hub_token,
            num_workers=num_workers,
            progressbar=progressbar,
        )
    
        assert list(dest_ds.tensors) == ["a", "b", "c", "d"]
        assert dest_ds.a.meta.htype == "image"
        assert dest_ds.a.meta.sample_compression == "png"
        assert dest_ds.b.meta.htype == "class_label"
        assert dest_ds.c.meta.htype == None
        assert dest_ds.d.dtype == bool
    
        assert dest_ds.info.key == 0
        assert dest_ds.d.info.key == 1
    
        for tensor in dest_ds.meta.tensors:
            assert_array_equal(src_ds[tensor].numpy(), dest_ds[tensor].numpy())
    
>       deeplake.delete(src_path, token=hub_token)

deeplake/api/tests/test_api.py:1036: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 52 in deeplake/api/tests/test_views.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_views.test_view_token_only

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
hub_cloud_path = 'hub://testingacc2/tmp6189_test_views_test_view_token_only'
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
hub_cloud_dev_credentials = ('testingacc2', None)

    @pytest.mark.slow
    def test_view_token_only(
        hub_cloud_path, hub_cloud_dev_token, hub_cloud_dev_credentials
    ):
        ds = deeplake.empty(hub_cloud_path, token=hub_cloud_dev_token)
        with ds:
            populate(ds)
    
        ds = deeplake.load(hub_cloud_path, token=hub_cloud_dev_token)
        view = ds[50:100]
        view.save_view(id="50to100")
    
        ds = deeplake.load(hub_cloud_path, read_only=True, token=hub_cloud_dev_token)
        view = ds[25:100]
        view.save_view(id="25to100")
    
        ds = deeplake.load(hub_cloud_path, read_only=True, token=hub_cloud_dev_token)
    
        loaded = ds.load_view("50to100")
        np.testing.assert_array_equal(loaded.images.numpy(), ds[50:100].images.numpy())
        np.testing.assert_array_equal(loaded.labels.numpy(), ds[50:100].labels.numpy())
        assert loaded._vds.path == posixpath.join(hub_cloud_path, ".queries/50to100")
    
        loaded = ds.load_view("25to100")
        np.testing.assert_array_equal(loaded.images.numpy(), ds[25:100].images.numpy())
        np.testing.assert_array_equal(loaded.labels.numpy(), ds[25:100].labels.numpy())
        assert loaded._vds.path == posixpath.join(hub_cloud_path, ".queries/25to100")
    
        ds.delete_view("25to100")
>       deeplake.delete(hub_cloud_path, token=hub_cloud_dev_token)

deeplake/api/tests/test_views.py:52: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 1063 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds = Dataset(path='hub://testingacc2/tmp6189_test_deeplake_vectorstore_test_update_embedding-embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = <function embedding_fn3 at 0x7fd8709c8860>
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
        if init_embedding_function is None:
            # case 3: errors out when init_embedding_function is not specified
            with pytest.raises(ValueError):
                vector_store.update_embedding(
                    ids=vector_store_hash_ids,
                    row_ids=vector_store_row_ids,
                    filter=vector_store_filters,
                    query=vector_store_query,
                    embedding_source_tensor=embedding_source_tensor,
                )
        else:
            # case 4
            vector_store.update_embedding(
                ids=vector_store_hash_ids,
                row_ids=vector_store_row_ids,
                filter=vector_store_filters,
                query=vector_store_query,
                embedding_source_tensor=embedding_source_tensor,
            )
            assert_updated_vector_store(
                0,
                vector_store,
                vector_store_hash_ids,
                vector_store_row_ids,
                vector_store_filters,
                vector_store_query,
                init_embedding_function,
                embedding_source_tensor,
                embedding_tensor,
                exec_option,
                num_changed_samples=5,
            )
    
>       vector_store.delete_by_path(path, token=ds.token)

deeplake/core/vectorstore/test_deeplake_vectorstore.py:1063: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:490: in delete_by_path
    deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 1063 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[None-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds = Dataset(path='hub://testingacc2/tmp6189_test_deeplake_vectorstore_test_update_embedding-None-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = None
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
        if init_embedding_function is None:
            # case 3: errors out when init_embedding_function is not specified
            with pytest.raises(ValueError):
                vector_store.update_embedding(
                    ids=vector_store_hash_ids,
                    row_ids=vector_store_row_ids,
                    filter=vector_store_filters,
                    query=vector_store_query,
                    embedding_source_tensor=embedding_source_tensor,
                )
        else:
            # case 4
            vector_store.update_embedding(
                ids=vector_store_hash_ids,
                row_ids=vector_store_row_ids,
                filter=vector_store_filters,
                query=vector_store_query,
                embedding_source_tensor=embedding_source_tensor,
            )
            assert_updated_vector_store(
                0,
                vector_store,
                vector_store_hash_ids,
                vector_store_row_ids,
                vector_store_filters,
                vector_store_query,
                init_embedding_function,
                embedding_source_tensor,
                embedding_tensor,
                exec_option,
                num_changed_samples=5,
            )
    
>       vector_store.delete_by_path(path, token=ds.token)

deeplake/core/vectorstore/test_deeplake_vectorstore.py:1063: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:490: in delete_by_path
    deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException