Skip to content

Commit

Permalink
Allow RLE for bools in v1 pages (#885)
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Sep 28, 2023
1 parent 09b78b5 commit df43dac
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions fastparquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ def read_data_page(f, helper, header, metadata, skip_nulls=False,
nval = daph.num_values - num_nulls
se = helper.schema_element(metadata.path_in_schema)
if daph.encoding == parquet_thrift.Encoding.PLAIN:

width = helper.schema_element(metadata.path_in_schema).type_length
width = se.type_length
values = read_plain(io_obj.read(),
metadata.type,
int(daph.num_values - num_nulls),
Expand All @@ -137,7 +136,9 @@ def read_data_page(f, helper, header, metadata, skip_nulls=False,
parquet_thrift.Encoding.RLE_DICTIONARY,
parquet_thrift.Encoding.RLE]:
# bit_width is stored as single byte.
if daph.encoding == parquet_thrift.Encoding.RLE:
if metadata.type == parquet_thrift.Type.BOOLEAN:
bit_width = 1
elif daph.encoding == parquet_thrift.Encoding.RLE:
bit_width = se.type_length
else:
bit_width = io_obj.read_byte()
Expand Down

0 comments on commit df43dac

Please sign in to comment.