-
Notifications
You must be signed in to change notification settings - Fork 602
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1066 from activeloopai/task/udpate-meta-api
dataset/tensor `info` alongside `meta`
- Loading branch information
Showing
18 changed files
with
429 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
from hub.core.storage.lru_cache import LRUCache | ||
from typing import Any, Dict | ||
from hub.core.storage.cachable import CachableCallback, use_callback | ||
|
||
|
||
class Info(CachableCallback): | ||
def __init__(self): | ||
"""Contains **optional** key/values that datasets/tensors use for human-readability. | ||
See the `Meta` class for required key/values for datasets/tensors. | ||
Note: | ||
Since `Info` is rarely written to and mostly by the user, every modifier will call `cache[key] = self`. | ||
Must call `initialize_callback_location` before using any methods. | ||
""" | ||
|
||
self._info = {} | ||
super().__init__() | ||
|
||
@property | ||
def nbytes(self): | ||
# TODO: optimize this | ||
return len(self.tobytes()) | ||
|
||
@use_callback(check_only=True) | ||
def __len__(self): | ||
return len(self._info) | ||
|
||
@use_callback(check_only=True) | ||
def __getstate__(self) -> Dict[str, Any]: | ||
return self._info | ||
|
||
def __setstate__(self, state: Dict[str, Any]): | ||
self._info = state | ||
|
||
@use_callback() | ||
def update(self, *args, **kwargs): | ||
"""Store optional dataset/tensor information. Will be accessible after loading your data from a new script! | ||
Inputs must be supported by JSON. | ||
Note: | ||
This method has the same functionality as `dict().update(...)` Reference: https://www.geeksforgeeks.org/python-dictionary-update-method/. | ||
A full list of supported value types can be found here: https://docs.python.org/3/library/json.html#json.JSONEncoder. | ||
Examples: | ||
Normal update usage: | ||
>>> ds.info | ||
{} | ||
>>> ds.info.update(key=0) | ||
>>> ds.info | ||
{"key": 0} | ||
>>> ds.info.update({"key1": 5, "key2": [1, 2, "test"]}) | ||
>>> ds.info | ||
{"key": 0, "key1": 5, "key2": [1, 2, "test"]} | ||
Alternate update usage: | ||
>>> ds.info | ||
{} | ||
>>> ds.info.update(list=[1, 2, "apple"]) | ||
>>> ds.info | ||
{"list": [1, 2, "apple"]} | ||
>>> l = ds.info.list | ||
>>> l | ||
[1, 2, "apple"] | ||
>>> l.append(5) | ||
>>> l | ||
[1, 2, "apple", 5] | ||
>>> ds.info.update() # required to be persistent! | ||
""" | ||
|
||
self._cache.check_readonly() | ||
self._info.update(*args, **kwargs) | ||
|
||
def __getattribute__(self, name: str) -> Any: | ||
"""Allows access to info values using the `.` syntax. Example: `info.description`.""" | ||
|
||
if name == "_info": | ||
return super().__getattribute__(name) | ||
if name in self._info: | ||
return self.__getitem__(name) | ||
return super().__getattribute__(name) | ||
|
||
def __getitem__(self, key: str): | ||
return self._info[key] | ||
|
||
def __str__(self): | ||
return self._info.__str__() | ||
|
||
def __repr__(self): | ||
return self._info.__repr__() | ||
|
||
|
||
def load_info(info_key: str, cache: LRUCache): | ||
if info_key in cache: | ||
info = cache.get_cachable(info_key, Info) | ||
else: | ||
info = Info() | ||
info.initialize_callback_location(info_key, cache) | ||
|
||
return info |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
def test_dataset(local_ds_generator): | ||
ds = local_ds_generator() | ||
|
||
assert len(ds.info) == 0 | ||
|
||
ds.info.update(my_key=0) | ||
ds.info.update(my_key=1) | ||
|
||
ds.info.update(another_key="hi") | ||
ds.info.update({"another_key": "hello"}) | ||
|
||
ds.info.update({"something": "aaaaa"}, something="bbbb") | ||
|
||
ds.info.update(test=[1, 2, "5"]) | ||
|
||
test_list = ds.info.test | ||
with ds: | ||
ds.info.update({"test2": (1, 5, (1, "2"), [5, 6, (7, 8)])}) | ||
ds.info.update(xyz="abc") | ||
test_list.extend(["user made change without `update`"]) | ||
|
||
ds.info.update({"1_-+": 5}) | ||
|
||
ds = local_ds_generator() | ||
|
||
assert len(ds.info) == 7 | ||
|
||
assert ds.info.another_key == "hello" | ||
assert ds.info.something == "bbbb" | ||
|
||
assert ds.info.test == [1, 2, "5", "user made change without `update`"] | ||
assert ds.info.test2 == [1, 5, [1, "2"], [5, 6, [7, 8]]] | ||
|
||
assert ds.info.xyz == "abc" | ||
assert ds.info["1_-+"] == 5 # key can't be accessed with `.` syntax | ||
|
||
ds.info.update(test=[99]) | ||
|
||
ds = local_ds_generator() | ||
|
||
assert len(ds.info) == 7 | ||
assert ds.info.test == [99] | ||
|
||
|
||
def test_tensor(local_ds_generator): | ||
ds = local_ds_generator() | ||
|
||
t1 = ds.create_tensor("tensor1") | ||
t2 = ds.create_tensor("tensor2") | ||
|
||
assert len(t1.info) == 0 | ||
assert len(t2.info) == 0 | ||
|
||
t1.info.update(key=0) | ||
t2.info.update(key=1, key1=0) | ||
|
||
ds = local_ds_generator() | ||
|
||
t1 = ds.tensor1 | ||
t2 = ds.tensor2 | ||
|
||
assert len(t1.info) == 1 | ||
assert len(t2.info) == 2 | ||
|
||
assert t1.info.key == 0 | ||
assert t2.info.key == 1 | ||
assert t2.info.key1 == 0 | ||
|
||
with ds: | ||
t1.info.update(key=99) | ||
|
||
ds = local_ds_generator() | ||
|
||
t1 = ds.tensor1 | ||
t2 = ds.tensor2 | ||
|
||
assert len(t1.info) == 1 | ||
assert len(t2.info) == 2 | ||
|
||
assert t1.info.key == 99 | ||
|
||
|
||
def test_update_reference_manually(local_ds_generator): | ||
"""Right now synchronization can only happen when you call `info.update`.""" | ||
|
||
ds = local_ds_generator() | ||
|
||
ds.info.update(key=[1, 2, 3]) | ||
|
||
ds = local_ds_generator() | ||
|
||
l = ds.info.key | ||
assert l == [1, 2, 3] | ||
|
||
# un-registered update | ||
l.append(5) | ||
assert ds.info.key == [1, 2, 3, 5] | ||
|
||
ds = local_ds_generator() | ||
|
||
l = ds.info.key | ||
assert l == [1, 2, 3] | ||
|
||
# registered update | ||
l.append(99) | ||
ds.info.update() | ||
|
||
ds = local_ds_generator() | ||
|
||
assert l == [1, 2, 3, 99] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.