Skip to content

Commit

Permalink
View updates (#2259)
Browse files Browse the repository at this point in the history
* mp updates

* show commit id

* fix

* fix

* fmt
  • Loading branch information
farizrahman4u authored Mar 28, 2023
1 parent aedeb46 commit baeeee4
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 22 deletions.
19 changes: 19 additions & 0 deletions deeplake/api/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,22 @@ def test_vds_read_only(hub_cloud_path, hub_cloud_dev_token):

assert view.base_storage.read_only == True
assert view._vds.base_storage.read_only == True


def test_view_from_different_commit(local_ds):
with local_ds as ds:
ds.create_tensor("x")
ds.x.extend(list(range(10)))
cid = ds.commit()
view = ds[4:9]
view.save_view(id="abcd")
ds.x.extend(list(range(10, 20)))
cid2 = ds.commit()
view2 = ds.load_view("abcd")
assert view2.commit_id == cid
assert ds.commit_id == cid2
assert not view2.is_optimized
view2.save_view(id="efg", optimize=True)
view3 = ds.load_view("efg")
assert ds.commit_id == cid2
assert view3.is_optimized
40 changes: 19 additions & 21 deletions deeplake/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3077,6 +3077,7 @@ def _get_view(self, inherit_creds=True, creds: Optional[Dict] = None):
)

ds.index = Index()
ds.version_state = ds.version_state.copy()
ds._checkout(commit_id, verbose=False)
first_index_subscriptable = self.info.get("first-index-subscriptable", True)
if first_index_subscriptable:
Expand Down Expand Up @@ -3117,21 +3118,17 @@ def get_views(self, commit_id: Optional[str] = None) -> List[ViewEntry]:
Args:
commit_id (str, optional): - Commit from which views should be returned.
- If not specified, views from current commit is returned.
- If not specified, views from the currently checked out commit will be returned.
- If not specified, views from all commits are returned.
Returns:
List[ViewEntry]: List of :class:`ViewEntry` instances.
"""
commit_id = commit_id or self.commit_id
queries = self._read_queries_json()
f = lambda x: x["source-dataset-version"] == commit_id
ret = map(
partial(ViewEntry, dataset=self),
filter(f, queries),
)

return list(ret)
if commit_id is not None:
queries = filter(
lambda x: x["source-dataset-version"] == commit_id, queries
)
return list(map(partial(ViewEntry, dataset=self), queries))

def get_view(self, id: str) -> ViewEntry:
"""Returns the dataset view corresponding to ``id``.
Expand Down Expand Up @@ -3189,18 +3186,15 @@ def load_view(
Raises:
KeyError: if view with given id does not exist.
"""
view = self.get_view(id)
if optimize:
return (
self.get_view(id)
.optimize(
tensors=tensors,
num_workers=num_workers,
scheduler=scheduler,
progressbar=progressbar,
)
.load()
)
return self.get_view(id).load()
return view.optimize(
tensors=tensors,
num_workers=num_workers,
scheduler=scheduler,
progressbar=progressbar,
).load()
return view.load()

def delete_view(self, id: str):
"""Deletes the view with given view id.
Expand Down Expand Up @@ -3887,6 +3881,10 @@ def is_view(self) -> bool:
or hasattr(self, "_view_entry")
)

@property
def is_optimized(self) -> bool:
return not getattr(getattr(self, "_view_entry", None), "virtual", True)

@property
def min_view(self):
"""Returns a view of the dataset in which all tensors are sliced to have the same length as
Expand Down
8 changes: 7 additions & 1 deletion deeplake/core/dataset/view_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ def message(self) -> str:
"""Returns the message with which the view was saved."""
return self.info.get("message", "")

@property
def commit_id(self) -> str:
return self.info["source-dataset-version"]

def __str__(self):
return f"View(id='{self.id}', message='{self.message}', virtual={self.virtual})"
return f"View(id='{self.id}', message='{self.message}', virtual={self.virtual}, commit_id={self.commit_id})"

__repr__ = __str__

Expand All @@ -51,6 +55,8 @@ def load(self, verbose=True):
Returns:
Dataset: Loaded dataset view.
"""
if self.commit_id != self._ds.commit_id:
print(f"Loading view from commit id {self.commit_id}.")
ds = self._ds._sub_ds(
".queries/" + (self.info.get("path") or self.info["id"]),
lock=False,
Expand Down
6 changes: 6 additions & 0 deletions deeplake/core/storage/memory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any, Dict
from deeplake.core.storage.lru_cache import _get_nbytes
from deeplake.core.storage.provider import StorageProvider
import os


class MemoryProvider(StorageProvider):
Expand Down Expand Up @@ -124,3 +125,8 @@ def __setstate__(self, state: str):

def get_object_size(self, key: str) -> int:
return _get_nbytes(self[key])

def subdir(self, path: str, read_only: bool = False):
sd = self.__class__(os.path.join(self.root, path))
sd.read_only = read_only
return sd

0 comments on commit baeeee4

Please sign in to comment.