Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AL-1443] Tiling + Slicing + Updating #1374

Merged
merged 48 commits into from
Dec 20, 2021
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
5955529
lazy slicing
farizrahman4u Dec 5, 2021
c4d3c53
lazy slice+updating
farizrahman4u Dec 5, 2021
7ee8375
fixesg
farizrahman4u Dec 6, 2021
2860ac7
Merge branch 'main' of https://www.github.com/activeloopai/hub into f…
farizrahman4u Dec 7, 2021
09d9e41
vc fix
farizrahman4u Dec 7, 2021
6f2b738
tile in tests
farizrahman4u Dec 7, 2021
c250e14
format
farizrahman4u Dec 7, 2021
68cdc71
format
farizrahman4u Dec 7, 2021
c12663d
fix
farizrahman4u Dec 7, 2021
dcced09
cleanup
farizrahman4u Dec 7, 2021
bc1fbb7
mypy
farizrahman4u Dec 8, 2021
4c45d34
Merge branch 'main' of https://www.github.com/activeloopai/hub into f…
farizrahman4u Dec 15, 2021
327ede1
chunk replace
farizrahman4u Dec 15, 2021
3361433
format
farizrahman4u Dec 15, 2021
c2e29a0
merge main
farizrahman4u Dec 15, 2021
e55acb4
cov
farizrahman4u Dec 15, 2021
52a8335
mypy
farizrahman4u Dec 15, 2021
36cdd3b
more cov
farizrahman4u Dec 15, 2021
f367759
more cov
farizrahman4u Dec 15, 2021
97015bd
more cov
farizrahman4u Dec 15, 2021
be36760
neg indixing
farizrahman4u Dec 15, 2021
96c084c
generalize neg idxs to ndim
farizrahman4u Dec 15, 2021
cedb7a6
Merge branch 'main' of https://www.github.com/activeloopai/hub into f…
farizrahman4u Dec 15, 2021
1e4ecae
fix updates
farizrahman4u Dec 15, 2021
923a977
mypy
farizrahman4u Dec 15, 2021
59e4f9d
darg
farizrahman4u Dec 15, 2021
5219a4a
form
farizrahman4u Dec 15, 2021
c56b49d
form
farizrahman4u Dec 15, 2021
294ade7
mypy
farizrahman4u Dec 15, 2021
e769274
read only fix
farizrahman4u Dec 16, 2021
f65f95f
smol fix
farizrahman4u Dec 16, 2021
739ffaa
merge main
farizrahman4u Dec 17, 2021
34af099
nits
farizrahman4u Dec 17, 2021
9ad7a66
fix commit diff
farizrahman4u Dec 17, 2021
f88e5d2
smol fix
farizrahman4u Dec 17, 2021
5eb91e3
Merge branch 'main' of https://www.github.com/activeloopai/hub into f…
farizrahman4u Dec 17, 2021
594f153
Fix tiling + version control (#1398)
AbhinavTuli Dec 17, 2021
aca3c32
remove unused var
farizrahman4u Dec 17, 2021
c2d59a2
Merge branch 'fr_tiling_2' of https://www.github.com/activeloopai/hub…
farizrahman4u Dec 17, 2021
8d448a0
windows issue fix
AbhinavTuli Dec 18, 2021
94223ac
load compute after checks
AbhinavTuli Dec 18, 2021
cc1bba5
move autoflush restore into finally
AbhinavTuli Dec 18, 2021
c6c62e6
reduce test size for transform
AbhinavTuli Dec 18, 2021
2ad9889
lint fix
AbhinavTuli Dec 18, 2021
09c005b
infer sub sample shape from tile shapes
farizrahman4u Dec 20, 2021
434078a
Merge branch 'fix/windows-overflow' of https://www.github.com/activel…
farizrahman4u Dec 20, 2021
3b69734
Update dataset.py
farizrahman4u Dec 20, 2021
38c2207
Update __init__.py
farizrahman4u Dec 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions hub/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,25 @@ def test_populate_dataset(ds):

def test_larger_data_memory(memory_ds):
memory_ds.create_tensor("image")
memory_ds.image.extend(np.ones((4, 4096, 4096)))
x = np.ones((4, 4096, 4096))
memory_ds.image.extend(x)
assert len(memory_ds) == 4
assert memory_ds.image.shape == (4, 4096, 4096)
np.testing.assert_array_equal(memory_ds.image.numpy(), np.ones((4, 4096, 4096)))
assert memory_ds.image.shape == x.shape
np.testing.assert_array_equal(memory_ds.image.numpy(), x)
idxs = [
0,
1,
3,
-1,
slice(0, 3),
slice(2, 4),
slice(2, None),
(0, slice(5, None), slice(None, 714)),
(2, 100, 1007),
(slice(1, 3), [20, 1000, 2, 400], [-2, 3, 577, 4095]),
]
for idx in idxs:
np.testing.assert_array_equal(memory_ds.image[idx].numpy(), x[idx])


def test_stringify(memory_ds):
Expand Down
45 changes: 40 additions & 5 deletions hub/api/tests/test_api_tiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,18 @@ def test_mixed_small_large(local_ds_generator, compression):
ds = local_ds_generator()
arr1 = np.random.randint(0, 255, (3003, 2001, 3)).astype(np.uint8)
arr2 = np.random.randint(0, 255, (500, 500, 3)).astype(np.uint8)
arr3 = np.random.randint(0, 255, (2503, 2501, 3)).astype(np.uint8)
arr3 = np.random.randint(0, 255, (250, 250, 3)).astype(np.uint8)

idxs = [
(slice(73, 117), slice(9, 17)),
4,
-1,
slice(
10,
),
slice(20, 37),
]

with ds:
ds.create_tensor("abc", **compression)
for i in range(10):
Expand All @@ -37,15 +48,24 @@ def test_mixed_small_large(local_ds_generator, compression):
for i in range(10):
if i % 5 == 0:
np.testing.assert_array_equal(ds.abc[i].numpy(), arr1)
for idx in idxs:
np.testing.assert_array_equal(ds.abc[i][idx].numpy(), arr1[idx])
else:
np.testing.assert_array_equal(ds.abc[i].numpy(), arr2)
for idx in idxs:
np.testing.assert_array_equal(ds.abc[i][idx].numpy(), arr2[idx])

ds = local_ds_generator()

for i in range(10):
if i % 5 == 0:
np.testing.assert_array_equal(ds.abc[i].numpy(), arr1)
for idx in idxs:
np.testing.assert_array_equal(ds.abc[i][idx].numpy(), arr1[idx])
else:
np.testing.assert_array_equal(ds.abc[i].numpy(), arr2)
for idx in idxs:
np.testing.assert_array_equal(ds.abc[i][idx].numpy(), arr2[idx])

with ds:
ds.abc.extend([arr3] * 3)
Expand Down Expand Up @@ -74,27 +94,42 @@ def test_updates(memory_ds, compression):
arr2 = np.random.randint(0, 255, (500, 500, 3)).astype(np.uint8)
arr3 = np.random.randint(0, 255, (2503, 2501, 3)).astype(np.uint8)
arr4 = np.random.randint(0, 255, (250, 250, 3)).astype(np.uint8)

update_idx = (slice(73, 117), slice(1765, 1901))

arr5 = arr1 * 2
arr6 = arr5[update_idx]
arr6 += 1

with memory_ds:
memory_ds.create_tensor("abc", **compression)
memory_ds.create_tensor("abc", max_chunk_size=2 ** 21, **compression)
for i in range(10):
if i % 5 == 0:
memory_ds.abc.append(arr1)
else:
memory_ds.abc.append(arr2)

len(memory_ds)
with memory_ds:
for i in range(10):
if i % 5 != 0:
if i % 5 == 0:
memory_ds.abc[i] = arr1 * 2
memory_ds.abc[i][update_idx] = arr6
else:
memory_ds.abc[i] = arr3 if i % 2 == 0 else arr4

for i in range(10):
if i % 5 == 0:
np.testing.assert_array_equal(memory_ds.abc[i].numpy(), arr1)
np.testing.assert_array_equal(memory_ds.abc[i].numpy(), arr5)
elif i % 2 == 0:
np.testing.assert_array_equal(memory_ds.abc[i].numpy(), arr3)
else:
np.testing.assert_array_equal(memory_ds.abc[i].numpy(), arr4)

# update tiled sample with small sample
arr7 = np.random.randint(0, 255, (3, 2, 3)).astype(np.uint8)
memory_ds.abc[0] = arr7
np.testing.assert_array_equal(memory_ds.abc[0].numpy(), arr7)


def test_cachable_overflow(memory_ds):
ds = memory_ds
Expand Down
12 changes: 8 additions & 4 deletions hub/core/chunk/base_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ def __init__(
self._decompressed_samples: Optional[List[np.ndarray]] = None
self._decompressed_bytes: Optional[bytes] = None

# Whether tensor meta is updated by chunk. Used by chunk engine.
self._update_meta: bool = True

@property
def num_data_bytes(self) -> int:
return len(self.data_bytes)
Expand Down Expand Up @@ -127,7 +130,7 @@ def frombuffer(cls, buffer: bytes, chunk_args: list, copy=True): # type: ignore
return chunk

@abstractmethod
def extend_if_has_space(self, incoming_samples) -> float:
def extend_if_has_space(self, incoming_samples, update_meta: bool = True) -> float:
"""Extends the chunk with the incoming samples."""

@abstractmethod
Expand Down Expand Up @@ -242,8 +245,9 @@ def copy(self, chunk_args=None):
def register_in_meta_and_headers(self, sample_nbytes: Optional[int], shape):
"""Registers a new sample in meta and headers"""
self.register_sample_to_headers(sample_nbytes, shape)
self.tensor_meta.length += 1
self.tensor_meta.update_shape_interval(shape)
if self._update_meta:
self.tensor_meta.length += 1
self.tensor_meta.update_shape_interval(shape)

def update_in_meta_and_headers(
self, local_index: int, sample_nbytes: Optional[int], shape
Expand Down Expand Up @@ -286,7 +290,7 @@ def write_tile(self, sample: SampleTiles, skip_bytes=False):
data, tile_shape = sample.yield_tile()
sample_nbytes = None if skip_bytes else len(data)
self.data_bytes = data
update_meta = sample.is_first_write
update_meta = self._update_meta and sample.is_first_write
self.register_sample_to_headers(sample_nbytes, tile_shape)
if update_meta:
self.tensor_meta.length += 1
Expand Down
2 changes: 1 addition & 1 deletion hub/core/chunk/chunk_compressed_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


class ChunkCompressedChunk(BaseChunk):
def extend_if_has_space(self, incoming_samples: List[InputSample]) -> float:
def extend_if_has_space(self, incoming_samples: List[InputSample]) -> float: # type: ignore
self.prepare_for_write()
if self.is_byte_compression:
return self.extend_if_has_space_byte_compression(incoming_samples)
Expand Down
2 changes: 1 addition & 1 deletion hub/core/chunk/sample_compressed_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


class SampleCompressedChunk(BaseChunk):
def extend_if_has_space(self, incoming_samples: List[InputSample]) -> float:
def extend_if_has_space(self, incoming_samples: List[InputSample]) -> float: # type: ignore
self.prepare_for_write()
num_samples: float = 0
dtype = self.dtype if self.is_byte_compression else None
Expand Down
2 changes: 1 addition & 1 deletion hub/core/chunk/uncompressed_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


class UncompressedChunk(BaseChunk):
def extend_if_has_space(
def extend_if_has_space( # type: ignore
self, incoming_samples: Union[List[InputSample], np.ndarray]
) -> float:
self.prepare_for_write()
Expand Down
Loading