Skip to content

Commit

Permalink
fix: binary format version bump (#1414)
Browse files Browse the repository at this point in the history
Signed-off-by: Yanlong Wang <yanlong.wang@naiver.org>
  • Loading branch information
nomagick committed Apr 20, 2023
1 parent f67508a commit bf2e50c
Showing 1 changed file with 17 additions and 3 deletions.
20 changes: 17 additions & 3 deletions docarray/array/doc_list/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,11 +546,11 @@ def to_dataframe(self) -> 'pd.DataFrame':
def _stream_header(self) -> bytes:
# Binary format for streaming case

# V1 DocList streaming serialization format
# | 1 byte | 8 bytes | 4 bytes | variable | 4 bytes | variable ...
# V2 DocList streaming serialization format
# | 1 byte | 8 bytes | 4 bytes | variable(docarray v2) | 4 bytes | variable(docarray v2) ...

# 1 byte (uint8)
version_byte = b'\x01'
version_byte = b'\x02'
# 8 bytes (uint64)
num_docs_as_bytes = len(self).to_bytes(8, 'big', signed=False)
return version_byte + num_docs_as_bytes
Expand Down Expand Up @@ -597,6 +597,12 @@ def _load_binary_all(
from docarray.utils._internal.progress_bar import _get_progressbar

# 1 byte (uint8)
version_num = int.from_bytes(d[0:1], 'big', signed=False)
if version_num != 2:
raise ValueError(
f'Unsupported version number {version_num} in binary format, expected 2'
)

# 8 bytes (uint64)
num_docs = int.from_bytes(d[1:9], 'big', signed=False)

Expand Down Expand Up @@ -655,6 +661,14 @@ def _load_binary_stream(
with file_ctx as f:
version_numdocs_lendoc0 = f.read(9)
# 1 byte (uint8)
version_num = int.from_bytes(
version_numdocs_lendoc0[0:1], 'big', signed=False
)
if version_num != 2:
raise ValueError(
f'Unsupported version number {version_num} in binary format, expected 2'
)

# 8 bytes (uint64)
num_docs = int.from_bytes(version_numdocs_lendoc0[1:9], 'big', signed=False)

Expand Down

0 comments on commit bf2e50c

Please sign in to comment.