Skip to content

Commit

Permalink
some comments for @abhinav
Browse files Browse the repository at this point in the history
  • Loading branch information
verbose-void committed May 7, 2021
1 parent 730120d commit 8d95e4a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
3 changes: 2 additions & 1 deletion hub/core/chunk_engine/storage_chain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# TODO: merge cache_chain -> storage_chain. we can merge them to reduce params
# TODO: create class to handle this merge, also the class should be moved from `chunk_engine/` to `storage/`


def write_bytes_with_caching(key, b, cache_chain, storage):
Expand Down Expand Up @@ -44,4 +45,4 @@ def flush_cache(cache_chain, storage):
for key in keys:
del cache[key]

# TODO: test flushing to make surec cache.used_space will return 0
# TODO: test flushing to make sure cache.used_space will return 0
5 changes: 5 additions & 0 deletions hub/core/chunk_engine/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def chunk_and_write_array(
for i in range(array.shape[0]):
sample = array[i]

# TODO: if compressor is sample-based ONLY, then we should compress `sample` here & `chunk_and_write_bytes` should not compress

# TODO: this can be replaced with hilbert curve or something
b = array_to_bytes(sample)
start_chunk, end_chunk = chunk_and_write_bytes(
Expand Down Expand Up @@ -94,8 +96,11 @@ def chunk_and_write_bytes(
# TODO: fill previous chunk if it is incomplete
# TODO: after previous chunk is fully filled, compress

# TODO: add threshold for compressing (in case user specifies like 10gb chunk_size)
if full_chunk:
# only compress if it is a full chunk

# TODO: sample-based chunking
chunk = compressor(chunk)
else:
chunk_name += "_incomplete"
Expand Down

0 comments on commit 8d95e4a

Please sign in to comment.