From baeeee4bcee45489afe9e9989bde019adcce968e Mon Sep 17 00:00:00 2001 From: Fariz Rahman Date: Tue, 28 Mar 2023 20:58:54 +0400 Subject: [PATCH] View updates (#2259) * mp updates * show commit id * fix * fix * fmt --- deeplake/api/tests/test_views.py | 19 ++++++++++++++ deeplake/core/dataset/dataset.py | 40 ++++++++++++++--------------- deeplake/core/dataset/view_entry.py | 8 +++++- deeplake/core/storage/memory.py | 6 +++++ 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/deeplake/api/tests/test_views.py b/deeplake/api/tests/test_views.py index af3d65f737..5f2e979e34 100644 --- a/deeplake/api/tests/test_views.py +++ b/deeplake/api/tests/test_views.py @@ -120,3 +120,22 @@ def test_vds_read_only(hub_cloud_path, hub_cloud_dev_token): assert view.base_storage.read_only == True assert view._vds.base_storage.read_only == True + + +def test_view_from_different_commit(local_ds): + with local_ds as ds: + ds.create_tensor("x") + ds.x.extend(list(range(10))) + cid = ds.commit() + view = ds[4:9] + view.save_view(id="abcd") + ds.x.extend(list(range(10, 20))) + cid2 = ds.commit() + view2 = ds.load_view("abcd") + assert view2.commit_id == cid + assert ds.commit_id == cid2 + assert not view2.is_optimized + view2.save_view(id="efg", optimize=True) + view3 = ds.load_view("efg") + assert ds.commit_id == cid2 + assert view3.is_optimized diff --git a/deeplake/core/dataset/dataset.py b/deeplake/core/dataset/dataset.py index cf5bb3b9b2..0dee4cbb81 100644 --- a/deeplake/core/dataset/dataset.py +++ b/deeplake/core/dataset/dataset.py @@ -3077,6 +3077,7 @@ def _get_view(self, inherit_creds=True, creds: Optional[Dict] = None): ) ds.index = Index() + ds.version_state = ds.version_state.copy() ds._checkout(commit_id, verbose=False) first_index_subscriptable = self.info.get("first-index-subscriptable", True) if first_index_subscriptable: @@ -3117,21 +3118,17 @@ def get_views(self, commit_id: Optional[str] = None) -> List[ViewEntry]: Args: commit_id (str, optional): - Commit from which views should be returned. - - If not specified, views from current commit is returned. - - If not specified, views from the currently checked out commit will be returned. + - If not specified, views from all commits are returned. Returns: List[ViewEntry]: List of :class:`ViewEntry` instances. """ - commit_id = commit_id or self.commit_id queries = self._read_queries_json() - f = lambda x: x["source-dataset-version"] == commit_id - ret = map( - partial(ViewEntry, dataset=self), - filter(f, queries), - ) - - return list(ret) + if commit_id is not None: + queries = filter( + lambda x: x["source-dataset-version"] == commit_id, queries + ) + return list(map(partial(ViewEntry, dataset=self), queries)) def get_view(self, id: str) -> ViewEntry: """Returns the dataset view corresponding to ``id``. @@ -3189,18 +3186,15 @@ def load_view( Raises: KeyError: if view with given id does not exist. """ + view = self.get_view(id) if optimize: - return ( - self.get_view(id) - .optimize( - tensors=tensors, - num_workers=num_workers, - scheduler=scheduler, - progressbar=progressbar, - ) - .load() - ) - return self.get_view(id).load() + return view.optimize( + tensors=tensors, + num_workers=num_workers, + scheduler=scheduler, + progressbar=progressbar, + ).load() + return view.load() def delete_view(self, id: str): """Deletes the view with given view id. @@ -3887,6 +3881,10 @@ def is_view(self) -> bool: or hasattr(self, "_view_entry") ) + @property + def is_optimized(self) -> bool: + return not getattr(getattr(self, "_view_entry", None), "virtual", True) + @property def min_view(self): """Returns a view of the dataset in which all tensors are sliced to have the same length as diff --git a/deeplake/core/dataset/view_entry.py b/deeplake/core/dataset/view_entry.py index b3004f165e..1be02b5cce 100644 --- a/deeplake/core/dataset/view_entry.py +++ b/deeplake/core/dataset/view_entry.py @@ -33,8 +33,12 @@ def message(self) -> str: """Returns the message with which the view was saved.""" return self.info.get("message", "") + @property + def commit_id(self) -> str: + return self.info["source-dataset-version"] + def __str__(self): - return f"View(id='{self.id}', message='{self.message}', virtual={self.virtual})" + return f"View(id='{self.id}', message='{self.message}', virtual={self.virtual}, commit_id={self.commit_id})" __repr__ = __str__ @@ -51,6 +55,8 @@ def load(self, verbose=True): Returns: Dataset: Loaded dataset view. """ + if self.commit_id != self._ds.commit_id: + print(f"Loading view from commit id {self.commit_id}.") ds = self._ds._sub_ds( ".queries/" + (self.info.get("path") or self.info["id"]), lock=False, diff --git a/deeplake/core/storage/memory.py b/deeplake/core/storage/memory.py index ef7436d714..f15cb1f690 100644 --- a/deeplake/core/storage/memory.py +++ b/deeplake/core/storage/memory.py @@ -1,6 +1,7 @@ from typing import Any, Dict from deeplake.core.storage.lru_cache import _get_nbytes from deeplake.core.storage.provider import StorageProvider +import os class MemoryProvider(StorageProvider): @@ -124,3 +125,8 @@ def __setstate__(self, state: str): def get_object_size(self, key: str) -> int: return _get_nbytes(self[key]) + + def subdir(self, path: str, read_only: bool = False): + sd = self.__class__(os.path.join(self.root, path)) + sd.read_only = read_only + return sd