Skip to content

Merge pull request #2825 from activeloopai/docs_edits #1545

Merge pull request #2825 from activeloopai/docs_edits

Merge pull request #2825 from activeloopai/docs_edits #1545

GitHub Actions / JUnit Test Report failed Apr 16, 2024 in 0s

22436 tests run, 11797 passed, 10632 skipped, 7 failed.

Annotations

Check failure on line 17 in deeplake/util/tests/test_read.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_read.test[3.9.0]

deeplake.util.exceptions.DatasetHandlerError: A Deep Lake dataset does not exist at the given path (./datasets/3_9_0). Check the path provided or in case you want to create a new dataset, use deeplake.empty().
Raw output
version = '3.9.0', request = <FixtureRequest for <Function test[3.9.0]>>

    @versions
    def test(version, request):
        assert_version(version)
>       ds = load_dataset(version)

buH/buh/tests/test_read.py:17: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
buH/buh/tests/common.py:86: in load_dataset
    return _bc_load_dataset(dataset_path)
buH/buh/tests/common.py:79: in _bc_load_dataset
    return loader(path)
buH/buh/tests/common.py:58: in _load1
    return hub.load(path)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

path = './datasets/3_9_0', read_only = None, memory_cache_size = 2000
local_cache_size = 0, creds = {}, token = None, org_id = None, verbose = True
access_method = 'stream', unlink = False, reset = False, indra = False
check_integrity = None, lock_timeout = 0, lock_enabled = True
index_params = None

    @staticmethod
    @spinner
    def load(
        path: Union[str, pathlib.Path],
        read_only: Optional[bool] = None,
        memory_cache_size: int = DEFAULT_MEMORY_CACHE_SIZE,
        local_cache_size: int = DEFAULT_LOCAL_CACHE_SIZE,
        creds: Optional[Union[dict, str]] = None,
        token: Optional[str] = None,
        org_id: Optional[str] = None,
        verbose: bool = True,
        access_method: str = "stream",
        unlink: bool = False,
        reset: bool = False,
        indra: bool = USE_INDRA,
        check_integrity: Optional[bool] = None,
        lock_timeout: Optional[int] = 0,
        lock_enabled: Optional[bool] = True,
        index_params: Optional[Dict[str, Union[int, str]]] = None,
    ) -> Dataset:
        """Loads an existing dataset
    
        Examples:
    
            >>> ds = deeplake.load("hub://username/dataset")
            >>> ds = deeplake.load("s3://mybucket/my_dataset")
            >>> ds = deeplake.load("./datasets/my_dataset", overwrite=True)
    
            Loading to a specfic version:
    
            >>> ds = deeplake.load("hub://username/dataset@new_branch")
            >>> ds = deeplake.load("hub://username/dataset@3e49cded62b6b335c74ff07e97f8451a37aca7b2)
    
            >>> my_commit_id = "3e49cded62b6b335c74ff07e97f8451a37aca7b2"
            >>> ds = deeplake.load(f"hub://username/dataset@{my_commit_id}")
    
        Args:
            path (str, pathlib.Path): - The full path to the dataset. Can be:
                - a Deep Lake cloud path of the form ``hub://username/datasetname``. To write to Deep Lake cloud datasets, ensure that you are authenticated to Deep Lake (pass in a token using the 'token' parameter).
                - an s3 path of the form ``s3://bucketname/path/to/dataset``. Credentials are required in either the environment or passed to the creds argument.
                - a local file system path of the form ``./path/to/dataset`` or ``~/path/to/dataset`` or ``path/to/dataset``.
                - a memory path of the form ``mem://path/to/dataset`` which doesn't save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.
                - Loading to a specific version:
    
                        - You can also specify a ``commit_id`` or ``branch`` to load the dataset to that version directly by using the ``@`` symbol.
                        - The path will then be of the form ``hub://username/dataset@{branch}`` or ``hub://username/dataset@{commit_id}``.
                        - See examples above.
            read_only (bool, optional): Opens dataset in read only mode if this is passed as ``True``. Defaults to ``False``.
                Datasets stored on Deep Lake cloud that your account does not have write access to will automatically open in read mode.
            memory_cache_size (int): The size of the memory cache to be used in MB.
            local_cache_size (int): The size of the local filesystem cache to be used in MB.
            creds (dict, str, optional): The string ``ENV`` or a dictionary containing credentials used to access the dataset at the path.
                - If 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token' are present, these take precedence over credentials present in the environment or in credentials file. Currently only works with s3 paths.
                - It supports 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token', 'endpoint_url', 'aws_region', 'profile_name' as keys.
                - If 'ENV' is passed, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets. For datasets connected to hub cloud, specifying 'ENV' will override the credentials fetched from Activeloop and use local ones.
            token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
            org_id (str, Optional): Organization id to be used for enabling high-performance features. Only applicable for local datasets.
            verbose (bool): If ``True``, logs will be printed. Defaults to ``True``.
            access_method (str): The access method to use for the dataset. Can be:
    
                    - 'stream'
    
                        - Streams the data from the dataset i.e. only fetches data when required. This is the default value.
    
                    - 'download'
    
                        - Downloads the data to the local filesystem to the path specified in environment variable ``DEEPLAKE_DOWNLOAD_PATH``.
                          This will overwrite ``DEEPLAKE_DOWNLOAD_PATH``.
                        - Raises an exception if ``DEEPLAKE_DOWNLOAD_PATH`` environment variable is not set or if the dataset does not exist.
                        - The 'download' access method can be modified to specify num_workers and/or scheduler.
                          For example: 'download:2:processed' will use 2 workers and use processed scheduler, while 'download:3' will use 3 workers and
                          default scheduler (threaded), and 'download:processed' will use a single worker and use processed scheduler.
    
                    - 'local'
    
                        - Downloads the dataset if it doesn't already exist, otherwise loads from local storage.
                        - Raises an exception if ``DEEPLAKE_DOWNLOAD_PATH`` environment variable is not set.
                        - The 'local' access method can be modified to specify num_workers and/or scheduler to be used in case dataset needs to be downloaded.
                          If dataset needs to be downloaded, 'local:2:processed' will use 2 workers and use processed scheduler, while 'local:3' will use 3 workers
                          and default scheduler (threaded), and 'local:processed' will use a single worker and use processed scheduler.
            unlink (bool): Downloads linked samples if set to ``True``. Only applicable if ``access_method`` is ``download`` or ``local``. Defaults to ``False``.
            reset (bool): If the specified dataset cannot be loaded due to a corrupted HEAD state of the branch being loaded,
                          setting ``reset=True`` will reset HEAD changes and load the previous version.
            check_integrity (bool, Optional): Performs an integrity check by default (None) if the dataset has 20 or fewer tensors.
                                              Set to ``True`` to force integrity check, ``False`` to skip integrity check.
            indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
    
        ..
            # noqa: DAR101
    
        Returns:
            Dataset: Dataset loaded using the arguments provided.
    
        Raises:
            DatasetHandlerError: If a Dataset does not exist at the given path.
            AgreementError: When agreement is rejected
            UserNotLoggedInException: When user is not authenticated
            InvalidTokenException: If the specified toke is invalid
            TokenPermissionError: When there are permission or other errors related to token
            CheckoutError: If version address specified in the path cannot be found
            DatasetCorruptError: If loading the dataset failed due to corruption and ``reset`` is not ``True``
            ReadOnlyModeError: If reset is attempted in read-only mode
            LockedException: When attempting to open a dataset for writing when it is locked by another machine
            ValueError: If ``org_id`` is specified for a non-local dataset
            Exception: Re-raises caught exception if reset cannot fix the issue
            ValueError: If the org id is provided but the dataset is not local
    
        Warning:
            Setting ``access_method`` to download will overwrite the local copy of the dataset if it was previously downloaded.
    
        Note:
            Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset.
        """
        access_method, num_workers, scheduler = parse_access_method(access_method)
        check_access_method(access_method, overwrite=False, unlink=unlink)
    
        path, address = process_dataset_path(path)
    
        if creds is None:
            creds = {}
    
        if org_id is not None and get_path_type(path) != "local":
            raise ValueError("org_id parameter can only be used with local datasets")
    
        try:
            storage, cache_chain = get_storage_and_cache_chain(
                path=path,
                read_only=read_only,
                creds=creds,
                token=token,
                memory_cache_size=memory_cache_size,
                local_cache_size=local_cache_size,
                indra=indra,
            )
            feature_report_path(
                path,
                "load",
                {
                    "lock_enabled": lock_enabled,
                    "lock_timeout": lock_timeout,
                    "index_params": index_params,
                },
                token=token,
            )
        except Exception as e:
            if isinstance(e, UserNotLoggedInException):
                raise UserNotLoggedInException from None
            raise
        if not dataset_exists(cache_chain):
>           raise DatasetHandlerError(
                f"A Deep Lake dataset does not exist at the given path ({path}). Check the path provided or in case you want to create a new dataset, use deeplake.empty()."
            )
E           deeplake.util.exceptions.DatasetHandlerError: A Deep Lake dataset does not exist at the given path (./datasets/3_9_0). Check the path provided or in case you want to create a new dataset, use deeplake.empty().

deeplake/api/dataset.py:654: DatasetHandlerError

Check failure on line 25 in test_write

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_write.test_new_samples[3.9.0]

FileNotFoundError: [Errno 2] No such file or directory: './datasets/3_9_0'
Raw output
version = '3.9.0'
request = <FixtureRequest for <Function test_new_samples[3.9.0]>>

    @versions
    def test_new_samples(version, request):
        assert_version(version)
>       ds = load_dataset_copy(version, overwrite=True)

buH/buh/tests/test_write.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
buH/buh/tests/common.py:96: in load_dataset_copy
    new_path = shutil.copytree(dataset_path, new_dataset_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

src = './datasets/3_9_0', dst = './datasets/3_9_0_ffw3_9_1', symlinks = False
ignore = None, copy_function = <function copy2 at 0x7f0a72b27d90>
ignore_dangling_symlinks = False, dirs_exist_ok = False

    def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
                 ignore_dangling_symlinks=False, dirs_exist_ok=False):
        """Recursively copy a directory tree and return the destination directory.
    
        If exception(s) occur, an Error is raised with a list of reasons.
    
        If the optional symlinks flag is true, symbolic links in the
        source tree result in symbolic links in the destination tree; if
        it is false, the contents of the files pointed to by symbolic
        links are copied. If the file pointed by the symlink doesn't
        exist, an exception will be added in the list of errors raised in
        an Error exception at the end of the copy process.
    
        You can set the optional ignore_dangling_symlinks flag to true if you
        want to silence this exception. Notice that this has no effect on
        platforms that don't support os.symlink.
    
        The optional ignore argument is a callable. If given, it
        is called with the `src` parameter, which is the directory
        being visited by copytree(), and `names` which is the list of
        `src` contents, as returned by os.listdir():
    
            callable(src, names) -> ignored_names
    
        Since copytree() is called recursively, the callable will be
        called once for each directory that is copied. It returns a
        list of names relative to the `src` directory that should
        not be copied.
    
        The optional copy_function argument is a callable that will be used
        to copy each file. It will be called with the source path and the
        destination path as arguments. By default, copy2() is used, but any
        function that supports the same signature (like copy()) can be used.
    
        If dirs_exist_ok is false (the default) and `dst` already exists, a
        `FileExistsError` is raised. If `dirs_exist_ok` is true, the copying
        operation will continue if it encounters existing directories, and files
        within the `dst` tree will be overwritten by corresponding files from the
        `src` tree.
        """
        sys.audit("shutil.copytree", src, dst)
>       with os.scandir(src) as itr:
E       FileNotFoundError: [Errno 2] No such file or directory: './datasets/3_9_0'

/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/shutil.py:557: FileNotFoundError

Check failure on line 53 in test_write

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_write.test_new_tensor[3.9.0]

FileNotFoundError: [Errno 2] No such file or directory: './datasets/3_9_0'
Raw output
version = '3.9.0'
request = <FixtureRequest for <Function test_new_tensor[3.9.0]>>

    @versions
    def test_new_tensor(version, request):
        assert_version(version)
>       ds = load_dataset_copy(version, overwrite=True)

buH/buh/tests/test_write.py:53: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
buH/buh/tests/common.py:96: in load_dataset_copy
    new_path = shutil.copytree(dataset_path, new_dataset_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

src = './datasets/3_9_0', dst = './datasets/3_9_0_ffw3_9_1', symlinks = False
ignore = None, copy_function = <function copy2 at 0x7f0a72b27d90>
ignore_dangling_symlinks = False, dirs_exist_ok = False

    def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
                 ignore_dangling_symlinks=False, dirs_exist_ok=False):
        """Recursively copy a directory tree and return the destination directory.
    
        If exception(s) occur, an Error is raised with a list of reasons.
    
        If the optional symlinks flag is true, symbolic links in the
        source tree result in symbolic links in the destination tree; if
        it is false, the contents of the files pointed to by symbolic
        links are copied. If the file pointed by the symlink doesn't
        exist, an exception will be added in the list of errors raised in
        an Error exception at the end of the copy process.
    
        You can set the optional ignore_dangling_symlinks flag to true if you
        want to silence this exception. Notice that this has no effect on
        platforms that don't support os.symlink.
    
        The optional ignore argument is a callable. If given, it
        is called with the `src` parameter, which is the directory
        being visited by copytree(), and `names` which is the list of
        `src` contents, as returned by os.listdir():
    
            callable(src, names) -> ignored_names
    
        Since copytree() is called recursively, the callable will be
        called once for each directory that is copied. It returns a
        list of names relative to the `src` directory that should
        not be copied.
    
        The optional copy_function argument is a callable that will be used
        to copy each file. It will be called with the source path and the
        destination path as arguments. By default, copy2() is used, but any
        function that supports the same signature (like copy()) can be used.
    
        If dirs_exist_ok is false (the default) and `dst` already exists, a
        `FileExistsError` is raised. If `dirs_exist_ok` is true, the copying
        operation will continue if it encounters existing directories, and files
        within the `dst` tree will be overwritten by corresponding files from the
        `src` tree.
        """
        sys.audit("shutil.copytree", src, dst)
>       with os.scandir(src) as itr:
E       FileNotFoundError: [Errno 2] No such file or directory: './datasets/3_9_0'

/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/shutil.py:557: FileNotFoundError

Check failure on line 64 in test_write

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_write.test_update_samples[3.9.0]

FileNotFoundError: [Errno 2] No such file or directory: './datasets/3_9_0'
Raw output
version = '3.9.0'
request = <FixtureRequest for <Function test_update_samples[3.9.0]>>

    @versions
    def test_update_samples(version, request):
        assert_version(version)
>       ds = load_dataset_copy(version, overwrite=True)

buH/buh/tests/test_write.py:64: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
buH/buh/tests/common.py:96: in load_dataset_copy
    new_path = shutil.copytree(dataset_path, new_dataset_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

src = './datasets/3_9_0', dst = './datasets/3_9_0_ffw3_9_1', symlinks = False
ignore = None, copy_function = <function copy2 at 0x7f0a72b27d90>
ignore_dangling_symlinks = False, dirs_exist_ok = False

    def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
                 ignore_dangling_symlinks=False, dirs_exist_ok=False):
        """Recursively copy a directory tree and return the destination directory.
    
        If exception(s) occur, an Error is raised with a list of reasons.
    
        If the optional symlinks flag is true, symbolic links in the
        source tree result in symbolic links in the destination tree; if
        it is false, the contents of the files pointed to by symbolic
        links are copied. If the file pointed by the symlink doesn't
        exist, an exception will be added in the list of errors raised in
        an Error exception at the end of the copy process.
    
        You can set the optional ignore_dangling_symlinks flag to true if you
        want to silence this exception. Notice that this has no effect on
        platforms that don't support os.symlink.
    
        The optional ignore argument is a callable. If given, it
        is called with the `src` parameter, which is the directory
        being visited by copytree(), and `names` which is the list of
        `src` contents, as returned by os.listdir():
    
            callable(src, names) -> ignored_names
    
        Since copytree() is called recursively, the callable will be
        called once for each directory that is copied. It returns a
        list of names relative to the `src` directory that should
        not be copied.
    
        The optional copy_function argument is a callable that will be used
        to copy each file. It will be called with the source path and the
        destination path as arguments. By default, copy2() is used, but any
        function that supports the same signature (like copy()) can be used.
    
        If dirs_exist_ok is false (the default) and `dst` already exists, a
        `FileExistsError` is raised. If `dirs_exist_ok` is true, the copying
        operation will continue if it encounters existing directories, and files
        within the `dst` tree will be overwritten by corresponding files from the
        `src` tree.
        """
        sys.audit("shutil.copytree", src, dst)
>       with os.scandir(src) as itr:
E       FileNotFoundError: [Errno 2] No such file or directory: './datasets/3_9_0'

/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/shutil.py:557: FileNotFoundError

Check failure on line 471 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deepmemory.test_deepmemory_evaluate_without_embedding_function

deeplake.util.exceptions.InvalidTokenException: Token is invalid. Make sure the full token string is included and try again.
Raw output
self = <deeplake.client.client.DeepLakeBackendClient object at 0x7ffac429ee60>
org_id = 'testingacc2'
ds_name = 'tmpb627_test_deepmemory_test_deepmemory_evaluate_without_embedding_function'
mode = None, db_engine = {'enabled': False}, no_cache = False

    def get_dataset_credentials(
        self,
        org_id: str,
        ds_name: str,
        mode: Optional[str] = None,
        db_engine: Optional[dict] = None,
        no_cache: bool = False,
    ):
        """Retrieves temporary 12 hour credentials for the required dataset from the backend.
    
        Args:
            org_id (str): The name of the user/organization to which the dataset belongs.
            ds_name (str): The name of the dataset being accessed.
            mode (str, optional): The mode in which the user has requested to open the dataset.
                If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
            db_engine (dict, optional): The database engine args to use for the dataset.
            no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
    
        Returns:
            tuple: containing full url to dataset, credentials, mode and expiration time respectively.
    
        Raises:
            UserNotLoggedInException: When user is not authenticated
            InvalidTokenException: If the specified token is invalid
            TokenPermissionError: when there are permission or other errors related to token
            AgreementNotAcceptedError: when user has not accepted the agreement
            NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
        """
        import json
    
        db_engine = db_engine or {}
        relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
        try:
>           response = self.request(
                "GET",
                relative_url,
                endpoint=self.endpoint(),
                params={
                    "mode": mode,
                    "no_cache": no_cache,
                    "db_engine": json.dumps(db_engine),
                },
            ).json()

deeplake/client/client.py:196: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/client/client.py:148: in request
    check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

response = <Response [403]>

    def check_response_status(response: requests.Response):
        """Check response status and throw corresponding exception on failure."""
        code = response.status_code
        if code >= 200 and code < 300:
            return
    
        try:
            message = response.json()["description"]
        except Exception:
            message = " "
    
        if code == 400:
            raise BadRequestException(message)
        elif response.status_code == 401:
            raise AuthenticationException
        elif response.status_code == 403:
>           raise AuthorizationException(message, response=response)
E           deeplake.util.exceptions.AuthorizationException: You don't have permission to write to this dataset (testingacc2/tmpb627_test_deepmemory_test_deepmemory_evaluate_without_embedding_function). If you have read permissions try accessing it with read_only=True.

deeplake/client/utils.py:60: AuthorizationException

During handling of the above exception, another exception occurred:

self = <deeplake.client.client.DeepLakeBackendClient object at 0x7ffac429ee60>
org_id = 'testingacc2'
ds_name = 'tmpb627_test_deepmemory_test_deepmemory_evaluate_without_embedding_function'
mode = None, db_engine = {'enabled': False}, no_cache = False

    def get_dataset_credentials(
        self,
        org_id: str,
        ds_name: str,
        mode: Optional[str] = None,
        db_engine: Optional[dict] = None,
        no_cache: bool = False,
    ):
        """Retrieves temporary 12 hour credentials for the required dataset from the backend.
    
        Args:
            org_id (str): The name of the user/organization to which the dataset belongs.
            ds_name (str): The name of the dataset being accessed.
            mode (str, optional): The mode in which the user has requested to open the dataset.
                If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
            db_engine (dict, optional): The database engine args to use for the dataset.
            no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
    
        Returns:
            tuple: containing full url to dataset, credentials, mode and expiration time respectively.
    
        Raises:
            UserNotLoggedInException: When user is not authenticated
            InvalidTokenException: If the specified token is invalid
            TokenPermissionError: when there are permission or other errors related to token
            AgreementNotAcceptedError: when user has not accepted the agreement
            NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
        """
        import json
    
        db_engine = db_engine or {}
        relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
        try:
            response = self.request(
                "GET",
                relative_url,
                endpoint=self.endpoint(),
                params={
                    "mode": mode,
                    "no_cache": no_cache,
                    "db_engine": json.dumps(db_engine),
                },
            ).json()
        except Exception as e:
            if isinstance(e, AuthorizationException):
                response_data = e.response.json()
                code = response_data.get("code")
                if code == 1:
                    agreements = response_data["agreements"]
                    agreements = [agreement["text"] for agreement in agreements]
                    raise AgreementNotAcceptedError(agreements) from e
                elif code == 2:
                    raise NotLoggedInAgreementError from e
                else:
                    try:
>                       jwt.decode(self.token, options={"verify_signature": False})
E                       AttributeError: 'DeepLakeBackendClient' object has no attribute 'token'

deeplake/client/client.py:218: AttributeError

During handling of the above exception, another exception occurred:

corpus_query_relevances_copy = ('hub://testingacc2/tmpb627_test_deepmemory_test_deepmemory_evaluate_without_embedding_function', ['0-dimensional biom...]], ...], 'hub://testingacc2/tmpb627_test_deepmemory_test_deepmemory_evaluate_without_embedding_function_eval_queries')
questions_embeddings_and_relevances = (array([[-0.01518817,  0.02033963, -0.01228631, ..., -0.00286692,
        -0.0079668 , -0.00414979],
       [-0.003503...A treatment decreases endoplasmic reticulum stress in response to general endoplasmic reticulum stress markers.', ...])
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @pytest.mark.slow
    @pytest.mark.timeout(600)
    @pytest.mark.skipif(sys.platform == "win32", reason="Does not run on Windows")
    def test_deepmemory_evaluate_without_embedding_function(
        corpus_query_relevances_copy,
        questions_embeddings_and_relevances,
        hub_cloud_dev_token,
    ):
        corpus, _, _, query_path = corpus_query_relevances_copy
        (
            questions_embeddings,
            question_relevances,
            queries,
        ) = questions_embeddings_and_relevances
    
>       db = VectorStore(
            corpus,
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
        )

deeplake/core/vectorstore/deep_memory/test_deepmemory.py:471: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:120: in __init__
    self.dataset_handler = get_dataset_handler(
deeplake/core/vectorstore/dataset_handlers/dataset_handler.py:13: in get_dataset_handler
    return ClientSideDH(*args, **kwargs)
deeplake/core/vectorstore/dataset_handlers/client_side_dataset_handler.py:66: in __init__
    self.dataset = dataset or dataset_utils.create_or_load_dataset(
deeplake/core/vectorstore/vector_search/dataset/dataset.py:49: in create_or_load_dataset
    return load_dataset(
deeplake/core/vectorstore/vector_search/dataset/dataset.py:99: in load_dataset
    dataset = deeplake.load(
deeplake/util/spinner.py:153: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:630: in load
    storage, cache_chain = get_storage_and_cache_chain(
deeplake/util/storage.py:242: in get_storage_and_cache_chain
    storage = storage_provider_from_path(
deeplake/util/storage.py:66: in storage_provider_from_path
    storage = storage_provider_from_hub_path(
deeplake/util/storage.py:162: in storage_provider_from_hub_path
    url, final_creds, mode, expiration, repo = get_dataset_credentials(
deeplake/util/storage.py:139: in get_dataset_credentials
    url, final_creds, mode, expiration, repo = client.get_dataset_credentials(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.client.client.DeepLakeBackendClient object at 0x7ffac429ee60>
org_id = 'testingacc2'
ds_name = 'tmpb627_test_deepmemory_test_deepmemory_evaluate_without_embedding_function'
mode = None, db_engine = {'enabled': False}, no_cache = False

    def get_dataset_credentials(
        self,
        org_id: str,
        ds_name: str,
        mode: Optional[str] = None,
        db_engine: Optional[dict] = None,
        no_cache: bool = False,
    ):
        """Retrieves temporary 12 hour credentials for the required dataset from the backend.
    
        Args:
            org_id (str): The name of the user/organization to which the dataset belongs.
            ds_name (str): The name of the dataset being accessed.
            mode (str, optional): The mode in which the user has requested to open the dataset.
                If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
            db_engine (dict, optional): The database engine args to use for the dataset.
            no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
    
        Returns:
            tuple: containing full url to dataset, credentials, mode and expiration time respectively.
    
        Raises:
            UserNotLoggedInException: When user is not authenticated
            InvalidTokenException: If the specified token is invalid
            TokenPermissionError: when there are permission or other errors related to token
            AgreementNotAcceptedError: when user has not accepted the agreement
            NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
        """
        import json
    
        db_engine = db_engine or {}
        relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
        try:
            response = self.request(
                "GET",
                relative_url,
                endpoint=self.endpoint(),
                params={
                    "mode": mode,
                    "no_cache": no_cache,
                    "db_engine": json.dumps(db_engine),
                },
            ).json()
        except Exception as e:
            if isinstance(e, AuthorizationException):
                response_data = e.response.json()
                code = response_data.get("code")
                if code == 1:
                    agreements = response_data["agreements"]
                    agreements = [agreement["text"] for agreement in agreements]
                    raise AgreementNotAcceptedError(agreements) from e
                elif code == 2:
                    raise NotLoggedInAgreementError from e
                else:
                    try:
                        jwt.decode(self.token, options={"verify_signature": False})
                    except Exception:
>                       raise InvalidTokenException
E                       deeplake.util.exceptions.InvalidTokenException: Token is invalid. Make sure the full token string is included and try again.

deeplake/client/client.py:220: InvalidTokenException

Check failure on line 156 in deeplake/api/tests/test_views.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_views.test_save_view_ignore_errors

AssertionError: assert 7 == 8
 +  where 7 = len(Dataset(path='./hub_pytest/test_views/test_save_view_ignore_errors/.queries/two', read_only=True, tensors=['images', 'labels']))
Raw output
local_ds = Dataset(path='./hub_pytest/test_views/test_save_view_ignore_errors', tensors=['images', 'labels'])

    @pytest.mark.slow
    def test_save_view_ignore_errors(local_ds):
        with local_ds as ds:
            ds.create_tensor(
                "images", htype="link[image]", sample_compression="jpg", verify=False
            )
            ds.create_tensor("labels", htype="class_label")
    
            ds.images.extend(
                [deeplake.link("https://picsum.photos/20/30") for _ in range(8)]
            )
            ds.images.extend([deeplake.link("https://abcd/20") for _ in range(2)])
            ds.images.extend(
                [deeplake.link("https://picsum.photos/20/30") for _ in range(10)]
            )
    
            ds.labels.extend([0 for _ in range(20)])
    
            ds.commit()
    
        with pytest.raises(TransformError):
            ds[:10].save_view(id="one", optimize=True, num_workers=2)
    
        ds[:10].save_view(id="two", optimize=True, ignore_errors=True, num_workers=2)
        view = ds.load_view("two")
    
>       assert len(view) == 8
E       AssertionError: assert 7 == 8
E        +  where 7 = len(Dataset(path='./hub_pytest/test_views/test_save_view_ignore_errors/.queries/two', read_only=True, tensors=['images', 'labels']))

deeplake/api/tests/test_views.py:156: AssertionError

Check failure on line 156 in deeplake/api/tests/test_views.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_views.test_save_view_ignore_errors

AssertionError: assert 7 == 8
 +  where 7 = len(Dataset(path='./hub_pytest/test_views/test_save_view_ignore_errors/.queries/two', read_only=True, tensors=['images', 'labels']))
Raw output
local_ds = Dataset(path='./hub_pytest/test_views/test_save_view_ignore_errors', tensors=['images', 'labels'])

    @pytest.mark.slow
    def test_save_view_ignore_errors(local_ds):
        with local_ds as ds:
            ds.create_tensor(
                "images", htype="link[image]", sample_compression="jpg", verify=False
            )
            ds.create_tensor("labels", htype="class_label")
    
            ds.images.extend(
                [deeplake.link("https://picsum.photos/20/30") for _ in range(8)]
            )
            ds.images.extend([deeplake.link("https://abcd/20") for _ in range(2)])
            ds.images.extend(
                [deeplake.link("https://picsum.photos/20/30") for _ in range(10)]
            )
    
            ds.labels.extend([0 for _ in range(20)])
    
            ds.commit()
    
        with pytest.raises(TransformError):
            ds[:10].save_view(id="one", optimize=True, num_workers=2)
    
        ds[:10].save_view(id="two", optimize=True, ignore_errors=True, num_workers=2)
        view = ds.load_view("two")
    
>       assert len(view) == 8
E       AssertionError: assert 7 == 8
E        +  where 7 = len(Dataset(path='./hub_pytest/test_views/test_save_view_ignore_errors/.queries/two', read_only=True, tensors=['images', 'labels']))

deeplake/api/tests/test_views.py:156: AssertionError