Skip to content

Improved message for GetChunkError #12251

Improved message for GetChunkError

Improved message for GetChunkError #12251

GitHub Actions / JUnit Test Report failed May 7, 2024 in 0s

2998 tests run, 1668 passed, 1319 skipped, 11 failed.

Annotations

Check failure on line 1 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deepmemory.test_deepmemory_train_and_cancel

failed on setup with "KeyError: NoSuchKey('An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.')"
Raw output
self = <deeplake.core.storage.s3.S3Provider object at 0x7f3e3accfd30>
path = 'vectordb/testingacc2/tmp65bb_test_deepmemory_test_deepmemory_train_and_cancel/dataset_meta.json'
start_byte = None, end_byte = None

    def get_bytes(
        self,
        path: str,
        start_byte: Optional[int] = None,
        end_byte: Optional[int] = None,
    ):
        """Gets the object present at the path within the given byte range.
    
        Args:
            path (str): The path relative to the root of the provider.
            start_byte (int, optional): If only specific bytes starting from ``start_byte`` are required.
            end_byte (int, optional): If only specific bytes up to end_byte are required.
    
        Returns:
            bytes: The bytes of the object present at the path within the given byte range.
    
        Raises:
            InvalidBytesRequestedError: If ``start_byte`` > ``end_byte`` or ``start_byte`` < 0 or ``end_byte`` < 0.
            KeyError: If an object is not found at the path.
            S3GetAccessError: Invalid credentials for the object path storage.
            S3GetError: Any other error while retrieving the object.
        """
        self._check_update_creds()
        path = "".join((self.path, path))
        try:
>           return self._get_bytes(path, start_byte, end_byte)

deeplake/core/storage/s3.py:275: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/storage/s3.py:247: in _get_bytes
    resp = self.client.get_object(Bucket=self.bucket, Key=path, Range=range)
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/botocore/client.py:553: in _api_call
    return self._make_api_call(operation_name, kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <botocore.client.S3 object at 0x7f3d48e762f0>
operation_name = 'GetObject'
api_params = {'Bucket': 'activeloopai-db-engine-staging', 'Key': 'vectordb/testingacc2/tmp65bb_test_deepmemory_test_deepmemory_train_and_cancel/dataset_meta.json', 'Range': ''}

    def _make_api_call(self, operation_name, api_params):
        operation_model = self._service_model.operation_model(operation_name)
        service_name = self._service_model.service_name
        history_recorder.record(
            'API_CALL',
            {
                'service': service_name,
                'operation': operation_name,
                'params': api_params,
            },
        )
        if operation_model.deprecated:
            logger.debug(
                'Warning: %s.%s() is deprecated', service_name, operation_name
            )
        request_context = {
            'client_region': self.meta.region_name,
            'client_config': self.meta.config,
            'has_streaming_input': operation_model.has_streaming_input,
            'auth_type': operation_model.auth_type,
        }
        api_params = self._emit_api_params(
            api_params=api_params,
            operation_model=operation_model,
            context=request_context,
        )
        (
            endpoint_url,
            additional_headers,
            properties,
        ) = self._resolve_endpoint_ruleset(
            operation_model, api_params, request_context
        )
        if properties:
            # Pass arbitrary endpoint info with the Request
            # for use during construction.
            request_context['endpoint_properties'] = properties
        request_dict = self._convert_to_request_dict(
            api_params=api_params,
            operation_model=operation_model,
            endpoint_url=endpoint_url,
            context=request_context,
            headers=additional_headers,
        )
        resolve_checksum_context(request_dict, operation_model, api_params)
    
        service_id = self._service_model.service_id.hyphenize()
        handler, event_response = self.meta.events.emit_until_response(
            'before-call.{service_id}.{operation_name}'.format(
                service_id=service_id, operation_name=operation_name
            ),
            model=operation_model,
            params=request_dict,
            request_signer=self._request_signer,
            context=request_context,
        )
    
        if event_response is not None:
            http, parsed_response = event_response
        else:
            maybe_compress_request(
                self.meta.config, request_dict, operation_model
            )
            apply_request_checksum(request_dict)
            http, parsed_response = self._make_request(
                operation_model, request_dict, request_context
            )
    
        self.meta.events.emit(
            'after-call.{service_id}.{operation_name}'.format(
                service_id=service_id, operation_name=operation_name
            ),
            http_response=http,
            parsed=parsed_response,
            model=operation_model,
            context=request_context,
        )
    
        if http.status_code >= 300:
            error_info = parsed_response.get("Error", {})
            error_code = error_info.get("QueryErrorCode") or error_info.get(
                "Code"
            )
            error_class = self.exceptions.from_code(error_code)
>           raise error_class(parsed_response, operation_name)
E           botocore.errorfactory.NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.

/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/botocore/client.py:1009: NoSuchKey

The above exception was the direct cause of the following exception:

request = <SubRequest 'corpus_query_relevances_copy' for <Function test_deepmemory_train_and_cancel>>
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @pytest.fixture
    def corpus_query_relevances_copy(request, hub_cloud_dev_token):
        if not is_opt_true(request, HUB_CLOUD_OPT):
            pytest.skip(f"{HUB_CLOUD_OPT} flag not set")
            return
    
        corpus = _get_storage_path(request, HUB_CLOUD)
        query_vs = VectorStore(
            path=f"hub://{HUB_CLOUD_DEV_USERNAME}/deepmemory_test_queries",
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
        )
        queries = query_vs.dataset.text.data()["value"]
        relevance = query_vs.dataset.metadata.data()["value"]
        relevance = [rel["relevance"] for rel in relevance]
    
>       deeplake.deepcopy(
            f"hub://{HUB_CLOUD_DEV_USERNAME}/test-deepmemory10",
            corpus,
            token=hub_cloud_dev_token,
            overwrite=True,
            runtime={"tensor_db": True},
        )

deeplake/tests/path_fixtures.py:491: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/api/dataset.py:1314: in deepcopy
    if not dataset._allow_delete(cache_chain):
deeplake/api/dataset.py:2130: in _allow_delete
    storage[get_dataset_meta_key(commit_id or FIRST_COMMIT_ID)].decode("utf-8")
deeplake/core/storage/lru_cache.py:217: in __getitem__
    result = self.next_storage[path]
deeplake/core/storage/s3.py:232: in __getitem__
    return self.get_bytes(path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.storage.s3.S3Provider object at 0x7f3e3accfd30>
path = 'vectordb/testingacc2/tmp65bb_test_deepmemory_test_deepmemory_train_and_cancel/dataset_meta.json'
start_byte = None, end_byte = None

    def get_bytes(
        self,
        path: str,
        start_byte: Optional[int] = None,
        end_byte: Optional[int] = None,
    ):
        """Gets the object present at the path within the given byte range.
    
        Args:
            path (str): The path relative to the root of the provider.
            start_byte (int, optional): If only specific bytes starting from ``start_byte`` are required.
            end_byte (int, optional): If only specific bytes up to end_byte are required.
    
        Returns:
            bytes: The bytes of the object present at the path within the given byte range.
    
        Raises:
            InvalidBytesRequestedError: If ``start_byte`` > ``end_byte`` or ``start_byte`` < 0 or ``end_byte`` < 0.
            KeyError: If an object is not found at the path.
            S3GetAccessError: Invalid credentials for the object path storage.
            S3GetError: Any other error while retrieving the object.
        """
        self._check_update_creds()
        path = "".join((self.path, path))
        try:
            return self._get_bytes(path, start_byte, end_byte)
        except botocore.exceptions.ClientError as err:
            if err.response["Error"]["Code"] == "NoSuchKey":
>               raise KeyError(err) from err
E               KeyError: NoSuchKey('An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.')

deeplake/core/storage/s3.py:278: KeyError

Check failure on line 948 in deeplake/enterprise/test_pytorch.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_pytorch.test_pytorch_data_decode

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd1 in position 2: invalid continuation byte
Raw output
local_auth_ds = Dataset(path='./hub_pytest/test_pytorch/test_pytorch_data_decode', tensors=['generic', 'text', 'json', 'list', 'class_label', 'image'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'

    @requires_libdeeplake
    @requires_torch
    @pytest.mark.flaky
    @pytest.mark.slow
    def test_pytorch_data_decode(local_auth_ds, cat_path):
        with local_auth_ds as ds:
            ds.create_tensor("generic")
            for i in range(10):
                ds.generic.append(i)
            ds.create_tensor("text", htype="text")
            for i in range(10):
                ds.text.append(f"hello {i}")
            ds.create_tensor("json", htype="json")
            for i in range(10):
                ds.json.append({"x": i})
            ds.create_tensor("list", htype="list")
            for i in range(10):
                ds.list.append([i, i + 1])
            ds.create_tensor("class_label", htype="class_label")
            animals = [
                "cat",
                "dog",
                "bird",
                "fish",
                "horse",
                "cow",
                "pig",
                "sheep",
                "goat",
                "chicken",
            ]
            ds.class_label.extend(animals)
            ds.create_tensor("image", htype="image", sample_compression="jpeg")
            for i in range(10):
                ds.image.append(deeplake.read(cat_path))
    
        decode_method = {tensor: "data" for tensor in list(ds.tensors.keys())}
        ptds = (
            ds.dataloader()
            .transform(identity)
            .pytorch(decode_method=decode_method, collate_fn=identity_collate)
        )
>       for i, batch in enumerate(ptds):

deeplake/enterprise/test_pytorch.py:948: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/enterprise/dataloader.py:881: in __next__
    return next(self._iterator)
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/loader.py:155: in __next__
    return next(self._iterator)
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:80: in __next__
    return self.get_data()
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:117: in get_data
    batch = self._next_data()
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:102: in _next_data
    sample[tensor] = bytes_to_text(sample[tensor], "json")
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

buffer = b'\x08\x0b\xd1+;\x7f\x00\x00', htype = 'json'

    def bytes_to_text(buffer, htype):
        buffer = bytes(buffer)
        if htype == "json":
            arr = np.empty(1, dtype=object)
>           arr[0] = json.loads(bytes.decode(buffer), cls=HubJsonDecoder)
E           UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd1 in position 2: invalid continuation byte

deeplake/core/serialize.py:481: UnicodeDecodeError

Check failure on line 750 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_hub_dataset_suffix_bug

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
hub_cloud_ds = Dataset(path='hub://testingacc2/tmp75b3_test_api_test_hub_dataset_suffix_bug', tensors=[])
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @pytest.mark.slow
    def test_hub_dataset_suffix_bug(hub_cloud_ds, hub_cloud_dev_token):
        # creating dataset with similar name but some suffix removed from end
        ds = deeplake.dataset(hub_cloud_ds.path[:-1], token=hub_cloud_dev_token)
    
        # need to delete because it's a different path (won't be auto cleaned up)
>       ds.delete()

deeplake/api/tests/test_api.py:750: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 976 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_dataset_rename[True-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7f041117d440>
path = 'hub://testingacc2/tmp75b3_test_api_test_dataset_rename-True-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
convert_to_pathlib = True

    @pytest.mark.parametrize(
        ("ds_generator", "path", "hub_token"),
        [
            ("local_ds_generator", "local_path", "hub_cloud_dev_token"),
            pytest.param(
                "s3_ds_generator", "s3_path", "hub_cloud_dev_token", marks=pytest.mark.slow
            ),
            pytest.param(
                "gcs_ds_generator",
                "gcs_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
            pytest.param(
                "azure_ds_generator",
                "azure_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
            pytest.param(
                "hub_cloud_ds_generator",
                "hub_cloud_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("convert_to_pathlib", [True, False])
    def test_dataset_rename(ds_generator, path, hub_token, convert_to_pathlib):
        ds = ds_generator()
        ds.create_tensor("abc")
        ds.abc.append([1, 2, 3, 4])
    
        new_path = "_".join([path, "renamed"])
    
        ds.path = convert_string_to_pathlib_if_needed(ds.path, convert_to_pathlib)
        new_path = convert_string_to_pathlib_if_needed(new_path, convert_to_pathlib)
    
        with pytest.raises(RenameError):
            ds.rename("wrongfolder/new_ds")
    
        if str(ds.path).startswith("hub://"):
            with pytest.raises(BadRequestException):
                ds.rename(ds.path)
        else:
            with pytest.raises(PathNotEmptyException):
                ds.rename(ds.path)
    
>       ds = deeplake.rename(ds.path, new_path, token=hub_token)

deeplake/api/tests/test_api.py:976: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/api/dataset.py:842: in rename
    deeplake.delete(old_path, token=token, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 976 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_dataset_rename[False-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7f031aa72840>
path = 'hub://testingacc2/tmp75b3_test_api_test_dataset_rename-False-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
convert_to_pathlib = False

    @pytest.mark.parametrize(
        ("ds_generator", "path", "hub_token"),
        [
            ("local_ds_generator", "local_path", "hub_cloud_dev_token"),
            pytest.param(
                "s3_ds_generator", "s3_path", "hub_cloud_dev_token", marks=pytest.mark.slow
            ),
            pytest.param(
                "gcs_ds_generator",
                "gcs_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
            pytest.param(
                "azure_ds_generator",
                "azure_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
            pytest.param(
                "hub_cloud_ds_generator",
                "hub_cloud_path",
                "hub_cloud_dev_token",
                marks=pytest.mark.slow,
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("convert_to_pathlib", [True, False])
    def test_dataset_rename(ds_generator, path, hub_token, convert_to_pathlib):
        ds = ds_generator()
        ds.create_tensor("abc")
        ds.abc.append([1, 2, 3, 4])
    
        new_path = "_".join([path, "renamed"])
    
        ds.path = convert_string_to_pathlib_if_needed(ds.path, convert_to_pathlib)
        new_path = convert_string_to_pathlib_if_needed(new_path, convert_to_pathlib)
    
        with pytest.raises(RenameError):
            ds.rename("wrongfolder/new_ds")
    
        if str(ds.path).startswith("hub://"):
            with pytest.raises(BadRequestException):
                ds.rename(ds.path)
        else:
            with pytest.raises(PathNotEmptyException):
                ds.rename(ds.path)
    
>       ds = deeplake.rename(ds.path, new_path, token=hub_token)

deeplake/api/tests/test_api.py:976: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/api/dataset.py:842: in rename
    deeplake.delete(old_path, token=token, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 1036 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_dataset_deepcopy[True-2-hub_cloud_path-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
path = 'hub://testingacc2/tmp75b3_test_api_test_dataset_deepcopy-True-2-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
num_workers = 2, progressbar = True

    @pytest.mark.parametrize(
        "path,hub_token",
        [
            ["local_path", "hub_cloud_dev_token"],
            pytest.param("hub_cloud_path", "hub_cloud_dev_token", marks=pytest.mark.slow),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("num_workers", [2])
    @pytest.mark.parametrize("progressbar", [True])
    def test_dataset_deepcopy(path, hub_token, num_workers, progressbar):
        src_path = "_".join((path, "src1"))
        dest_path = "_".join((path, "dest1"))
    
        src_ds = deeplake.empty(src_path, overwrite=True, token=hub_token)
    
        with src_ds:
            src_ds.info.update(key=0)
    
            src_ds.create_tensor("a", htype="image", sample_compression="png")
            src_ds.create_tensor("b", htype="class_label")
            src_ds.create_tensor("c")
            src_ds.create_tensor("d", dtype=bool)
    
            src_ds.d.info.update(key=1)
    
            src_ds["a"].append(np.ones((28, 28), dtype="uint8"))
            src_ds["b"].append(0)
    
        dest_ds = deeplake.deepcopy(
            src_path,
            dest_path,
            token=hub_token,
            num_workers=num_workers,
            progressbar=progressbar,
        )
    
        assert list(dest_ds.tensors) == ["a", "b", "c", "d"]
        assert dest_ds.a.meta.htype == "image"
        assert dest_ds.a.meta.sample_compression == "png"
        assert dest_ds.b.meta.htype == "class_label"
        assert dest_ds.c.meta.htype == None
        assert dest_ds.d.dtype == bool
    
        assert dest_ds.info.key == 0
        assert dest_ds.d.info.key == 1
    
        for tensor in dest_ds.meta.tensors:
            assert_array_equal(src_ds[tensor].numpy(), dest_ds[tensor].numpy())
    
>       deeplake.delete(src_path, token=hub_token)

deeplake/api/tests/test_api.py:1036: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 52 in deeplake/api/tests/test_views.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_views.test_view_token_only

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
hub_cloud_path = 'hub://testingacc2/tmp75b3_test_views_test_view_token_only'
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
hub_cloud_dev_credentials = ('testingacc2', None)

    @pytest.mark.slow
    def test_view_token_only(
        hub_cloud_path, hub_cloud_dev_token, hub_cloud_dev_credentials
    ):
        ds = deeplake.empty(hub_cloud_path, token=hub_cloud_dev_token)
        with ds:
            populate(ds)
    
        ds = deeplake.load(hub_cloud_path, token=hub_cloud_dev_token)
        view = ds[50:100]
        view.save_view(id="50to100")
    
        ds = deeplake.load(hub_cloud_path, read_only=True, token=hub_cloud_dev_token)
        view = ds[25:100]
        view.save_view(id="25to100")
    
        ds = deeplake.load(hub_cloud_path, read_only=True, token=hub_cloud_dev_token)
    
        loaded = ds.load_view("50to100")
        np.testing.assert_array_equal(loaded.images.numpy(), ds[50:100].images.numpy())
        np.testing.assert_array_equal(loaded.labels.numpy(), ds[50:100].labels.numpy())
        assert loaded._vds.path == posixpath.join(hub_cloud_path, ".queries/50to100")
    
        loaded = ds.load_view("25to100")
        np.testing.assert_array_equal(loaded.images.numpy(), ds[25:100].images.numpy())
        np.testing.assert_array_equal(loaded.labels.numpy(), ds[25:100].labels.numpy())
        assert loaded._vds.path == posixpath.join(hub_cloud_path, ".queries/25to100")
    
        ds.delete_view("25to100")
>       deeplake.delete(hub_cloud_path, token=hub_cloud_dev_token)

deeplake/api/tests/test_views.py:52: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 1063 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds = Dataset(path='hub://testingacc2/tmp75b3_test_deeplake_vectorstore_test_update_embedding-embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = <function embedding_fn3 at 0x7f03d2a40900>
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
        if init_embedding_function is None:
            # case 3: errors out when init_embedding_function is not specified
            with pytest.raises(ValueError):
                vector_store.update_embedding(
                    ids=vector_store_hash_ids,
                    row_ids=vector_store_row_ids,
                    filter=vector_store_filters,
                    query=vector_store_query,
                    embedding_source_tensor=embedding_source_tensor,
                )
        else:
            # case 4
            vector_store.update_embedding(
                ids=vector_store_hash_ids,
                row_ids=vector_store_row_ids,
                filter=vector_store_filters,
                query=vector_store_query,
                embedding_source_tensor=embedding_source_tensor,
            )
            assert_updated_vector_store(
                0,
                vector_store,
                vector_store_hash_ids,
                vector_store_row_ids,
                vector_store_filters,
                vector_store_query,
                init_embedding_function,
                embedding_source_tensor,
                embedding_tensor,
                exec_option,
                num_changed_samples=5,
            )
    
>       vector_store.delete_by_path(path, token=ds.token)

deeplake/core/vectorstore/test_deeplake_vectorstore.py:1063: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:490: in delete_by_path
    deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 1063 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[None-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]

deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds = Dataset(path='hub://testingacc2/tmp75b3_test_deeplake_vectorstore_test_update_embedding-None-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = None
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
        if init_embedding_function is None:
            # case 3: errors out when init_embedding_function is not specified
            with pytest.raises(ValueError):
                vector_store.update_embedding(
                    ids=vector_store_hash_ids,
                    row_ids=vector_store_row_ids,
                    filter=vector_store_filters,
                    query=vector_store_query,
                    embedding_source_tensor=embedding_source_tensor,
                )
        else:
            # case 4
            vector_store.update_embedding(
                ids=vector_store_hash_ids,
                row_ids=vector_store_row_ids,
                filter=vector_store_filters,
                query=vector_store_query,
                embedding_source_tensor=embedding_source_tensor,
            )
            assert_updated_vector_store(
                0,
                vector_store,
                vector_store_hash_ids,
                vector_store_row_ids,
                vector_store_filters,
                vector_store_query,
                init_embedding_function,
                embedding_source_tensor,
                embedding_tensor,
                exec_option,
                num_changed_samples=5,
            )
    
>       vector_store.delete_by_path(path, token=ds.token)

deeplake/core/vectorstore/test_deeplake_vectorstore.py:1063: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:490: in delete_by_path
    deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
    ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
    self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
    self.request(
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [400]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
>           raise BadRequestException(message)
E           deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E           object generator can't be used in 'await' expression

deeplake/client/utils.py:56: BadRequestException

Check failure on line 2701 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deeplake_vectorstore.test_read_only

deeplake.util.exceptions.InvalidTokenException: Token is invalid. Make sure the full token string is included and try again.
Raw output
self = <deeplake.client.client.DeepLakeBackendClient object at 0x7f041112c690>
org_id = 'davitbun', ds_name = 'twitter-algorithm', mode = 'w'
db_engine = {'enabled': False}, no_cache = False

    def get_dataset_credentials(
        self,
        org_id: str,
        ds_name: str,
        mode: Optional[str] = None,
        db_engine: Optional[dict] = None,
        no_cache: bool = False,
    ):
        """Retrieves temporary 12 hour credentials for the required dataset from the backend.
    
        Args:
            org_id (str): The name of the user/organization to which the dataset belongs.
            ds_name (str): The name of the dataset being accessed.
            mode (str, optional): The mode in which the user has requested to open the dataset.
                If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
            db_engine (dict, optional): The database engine args to use for the dataset.
            no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
    
        Returns:
            tuple: containing full url to dataset, credentials, mode and expiration time respectively.
    
        Raises:
            UserNotLoggedInException: When user is not authenticated
            InvalidTokenException: If the specified token is invalid
            TokenPermissionError: when there are permission or other errors related to token
            AgreementNotAcceptedError: when user has not accepted the agreement
            NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
        """
        import json
    
        db_engine = db_engine or {}
        relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
        try:
>           response = self.request(
                "GET",
                relative_url,
                endpoint=self.endpoint(),
                params={
                    "mode": mode,
                    "no_cache": no_cache,
                    "db_engine": json.dumps(db_engine),
                },
            ).json()

deeplake/client/client.py:196: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [403]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
            raise BadRequestException(message)
        elif response.status_code == 401:
            raise AuthenticationException
        elif response.status_code == 403:
>           raise AuthorizationException(message, response=response)
E           deeplake.util.exceptions.AuthorizationException: You don't have permission to write to this dataset (davitbun/twitter-algorithm). If you have read permissions try accessing it with read_only=True.

deeplake/client/utils.py:60: AuthorizationException

During handling of the above exception, another exception occurred:

self = <jwt.api_jws.PyJWS object at 0x7f04090d3250>
jwt = b'PUBLIC_TOKEN_______________________________________________________________________________________________________________________________________________________'

    def _load(self, jwt: str | bytes) -> tuple[bytes, bytes, dict[str, Any], bytes]:
        if isinstance(jwt, str):
            jwt = jwt.encode("utf-8")
    
        if not isinstance(jwt, bytes):
            raise DecodeError(f"Invalid token type. Token must be a {bytes}")
    
        try:
>           signing_input, crypto_segment = jwt.rsplit(b".", 1)
E           ValueError: not enough values to unpack (expected 2, got 1)

/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jws.py:257: ValueError

The above exception was the direct cause of the following exception:

self = <deeplake.client.client.DeepLakeBackendClient object at 0x7f041112c690>
org_id = 'davitbun', ds_name = 'twitter-algorithm', mode = 'w'
db_engine = {'enabled': False}, no_cache = False

    def get_dataset_credentials(
        self,
        org_id: str,
        ds_name: str,
        mode: Optional[str] = None,
        db_engine: Optional[dict] = None,
        no_cache: bool = False,
    ):
        """Retrieves temporary 12 hour credentials for the required dataset from the backend.
    
        Args:
            org_id (str): The name of the user/organization to which the dataset belongs.
            ds_name (str): The name of the dataset being accessed.
            mode (str, optional): The mode in which the user has requested to open the dataset.
                If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
            db_engine (dict, optional): The database engine args to use for the dataset.
            no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
    
        Returns:
            tuple: containing full url to dataset, credentials, mode and expiration time respectively.
    
        Raises:
            UserNotLoggedInException: When user is not authenticated
            InvalidTokenException: If the specified token is invalid
            TokenPermissionError: when there are permission or other errors related to token
            AgreementNotAcceptedError: when user has not accepted the agreement
            NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
        """
        import json
    
        db_engine = db_engine or {}
        relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
        try:
            response = self.request(
                "GET",
                relative_url,
                endpoint=self.endpoint(),
                params={
                    "mode": mode,
                    "no_cache": no_cache,
                    "db_engine": json.dumps(db_engine),
                },
            ).json()
        except Exception as e:
            if isinstance(e, AuthorizationException):
                response_data = e.response.json()
                code = response_data.get("code")
                if code == 1:
                    agreements = response_data["agreements"]
                    agreements = [agreement["text"] for agreement in agreements]
                    raise AgreementNotAcceptedError(agreements) from e
                elif code == 2:
                    raise NotLoggedInAgreementError from e
                else:
                    try:
>                       jwt.decode(self.token, options={"verify_signature": False})

deeplake/client/client.py:218: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jwt.py:210: in decode
    decoded = self.decode_complete(
/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jwt.py:151: in decode_complete
    decoded = api_jws.decode_complete(
/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jws.py:198: in decode_complete
    payload, signing_input, header, signature = self._load(jwt)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <jwt.api_jws.PyJWS object at 0x7f04090d3250>
jwt = b'PUBLIC_TOKEN_______________________________________________________________________________________________________________________________________________________'

    def _load(self, jwt: str | bytes) -> tuple[bytes, bytes, dict[str, Any], bytes]:
        if isinstance(jwt, str):
            jwt = jwt.encode("utf-8")
    
        if not isinstance(jwt, bytes):
            raise DecodeError(f"Invalid token type. Token must be a {bytes}")
    
        try:
            signing_input, crypto_segment = jwt.rsplit(b".", 1)
            header_segment, payload_segment = signing_input.split(b".", 1)
        except ValueError as err:
>           raise DecodeError("Not enough segments") from err
E           jwt.exceptions.DecodeError: Not enough segments

/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jws.py:260: DecodeError

During handling of the above exception, another exception occurred:

    @pytest.mark.slow
    def test_read_only():
>       db = VectorStore("hub://davitbun/twitter-algorithm")

deeplake/core/vectorstore/test_deeplake_vectorstore.py:2701: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:120: in __init__
    self.dataset_handler = get_dataset_handler(
deeplake/core/vectorstore/dataset_handlers/dataset_handler.py:13: in get_dataset_handler
    return ClientSideDH(*args, **kwargs)
deeplake/core/vectorstore/dataset_handlers/client_side_dataset_handler.py:66: in __init__
    self.dataset = dataset or dataset_utils.create_or_load_dataset(
deeplake/core/vectorstore/vector_search/dataset/dataset.py:60: in create_or_load_dataset
    return create_dataset(
deeplake/core/vectorstore/vector_search/dataset/dataset.py:180: in create_dataset
    dataset = deeplake.empty(
deeplake/api/dataset.py:452: in empty
    storage, cache_chain = get_storage_and_cache_chain(
deeplake/util/storage.py:242: in get_storage_and_cache_chain
    storage = storage_provider_from_path(
deeplake/util/storage.py:66: in storage_provider_from_path
    storage = storage_provider_from_hub_path(
deeplake/util/storage.py:162: in storage_provider_from_hub_path
    url, final_creds, mode, expiration, repo = get_dataset_credentials(
deeplake/util/storage.py:139: in get_dataset_credentials
    url, final_creds, mode, expiration, repo = client.get_dataset_credentials(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.client.client.DeepLakeBackendClient object at 0x7f041112c690>
org_id = 'davitbun', ds_name = 'twitter-algorithm', mode = 'w'
db_engine = {'enabled': False}, no_cache = False

    def get_dataset_credentials(
        self,
        org_id: str,
        ds_name: str,
        mode: Optional[str] = None,
        db_engine: Optional[dict] = None,
        no_cache: bool = False,
    ):
        """Retrieves temporary 12 hour credentials for the required dataset from the backend.
    
        Args:
            org_id (str): The name of the user/organization to which the dataset belongs.
            ds_name (str): The name of the dataset being accessed.
            mode (str, optional): The mode in which the user has requested to open the dataset.
                If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
            db_engine (dict, optional): The database engine args to use for the dataset.
            no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
    
        Returns:
            tuple: containing full url to dataset, credentials, mode and expiration time respectively.
    
        Raises:
            UserNotLoggedInException: When user is not authenticated
            InvalidTokenException: If the specified token is invalid
            TokenPermissionError: when there are permission or other errors related to token
            AgreementNotAcceptedError: when user has not accepted the agreement
            NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
        """
        import json
    
        db_engine = db_engine or {}
        relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
        try:
            response = self.request(
                "GET",
                relative_url,
                endpoint=self.endpoint(),
                params={
                    "mode": mode,
                    "no_cache": no_cache,
                    "db_engine": json.dumps(db_engine),
                },
            ).json()
        except Exception as e:
            if isinstance(e, AuthorizationException):
                response_data = e.response.json()
                code = response_data.get("code")
                if code == 1:
                    agreements = response_data["agreements"]
                    agreements = [agreement["text"] for agreement in agreements]
                    raise AgreementNotAcceptedError(agreements) from e
                elif code == 2:
                    raise NotLoggedInAgreementError from e
                else:
                    try:
                        jwt.decode(self.token, options={"verify_signature": False})
                    except Exception:
>                       raise InvalidTokenException
E                       deeplake.util.exceptions.InvalidTokenException: Token is invalid. Make sure the full token string is included and try again.

deeplake/client/client.py:220: InvalidTokenException

Check failure on line 189 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deepmemory.test_deepmemory_evaluate

AssertionError: assert {'recall@1': ...@3': 0.4, ...} == {'recall@1': ...@3': 0.6, ...}
  Omitting 4 identical items, use -vv to show
  Differing items:
  {'recall@1': 0.2} != {'recall@1': 0.4}
  {'recall@3': 0.4} != {'recall@3': 0.6}
  Full diff:
    {
  -  'recall@1': 0.4,
  ?                ^
  +  'recall@1': 0.2,
  ?                ^
     'recall@10': 0.6,
     'recall@100': 0.9,
  -  'recall@3': 0.6,
  ?                ^
  +  'recall@3': 0.4,
  ?                ^
     'recall@5': 0.6,
     'recall@50': 0.7,
    }
Raw output
corpus_query_relevances_copy = ('hub://testingacc2/tmp75b3_test_deepmemory_test_deepmemory_evaluate', ['0-dimensional biomaterials lack inductive pro...5107', 1]], [['32587939', 1]], ...], 'hub://testingacc2/tmp75b3_test_deepmemory_test_deepmemory_evaluate_eval_queries')
questions_embeddings_and_relevances = (array([[-0.01518817,  0.02033963, -0.01228631, ..., -0.00286692,
        -0.0079668 , -0.00414979],
       [-0.003503...A treatment decreases endoplasmic reticulum stress in response to general endoplasmic reticulum stress markers.', ...])
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @pytest.mark.slow
    @pytest.mark.timeout(600)
    @pytest.mark.skipif(sys.platform == "win32", reason="Does not run on Windows")
    @requires_libdeeplake
    def test_deepmemory_evaluate(
        corpus_query_relevances_copy,
        questions_embeddings_and_relevances,
        hub_cloud_dev_token,
    ):
        corpus, _, _, query_path = corpus_query_relevances_copy
        (
            questions_embeddings,
            question_relevances,
            queries,
        ) = questions_embeddings_and_relevances
    
        db = VectorStore(
            corpus,
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
        )
    
        # when qvs_params is wrong:
        with pytest.raises(ValueError):
            db.deep_memory.evaluate(
                queries=queries,
                embedding=questions_embeddings,
                relevance=question_relevances,
                qvs_params={
                    "log_queries": True,
                    "branch_name": "wrong_branch",
                },
            )
    
        # embedding_function is not provided in the constructor or in the eval method
        with pytest.raises(ValueError):
            db.deep_memory.evaluate(
                queries=queries,
                relevance=question_relevances,
                qvs_params={
                    "log_queries": True,
                    "branch_name": "wrong_branch",
                },
            )
    
        recall = db.deep_memory.evaluate(
            queries=queries,
            embedding=questions_embeddings,
            relevance=question_relevances,
            qvs_params={
                "branch": "queries",
            },
        )
    
>       assert recall["without model"] == {
            "recall@1": 0.4,
            "recall@3": 0.6,
            "recall@5": 0.6,
            "recall@10": 0.6,
            "recall@50": 0.7,
            "recall@100": 0.9,
        }
E       AssertionError: assert {'recall@1': ...@3': 0.4, ...} == {'recall@1': ...@3': 0.6, ...}
E         Omitting 4 identical items, use -vv to show
E         Differing items:
E         {'recall@1': 0.2} != {'recall@1': 0.4}
E         {'recall@3': 0.4} != {'recall@3': 0.6}
E         Full diff:
E           {
E         -  'recall@1': 0.4,
E         ?                ^
E         +  'recall@1': 0.2,
E         ?                ^
E            'recall@10': 0.6,
E            'recall@100': 0.9,
E         -  'recall@3': 0.6,
E         ?                ^
E         +  'recall@3': 0.4,
E         ?                ^
E            'recall@5': 0.6,
E            'recall@50': 0.7,
E           }

deeplake/core/vectorstore/deep_memory/test_deepmemory.py:189: AssertionError