Skip to content

Commit

Permalink
Allow for changes in dictionary while reading a row-group column (#367)
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Nov 29, 2018
1 parent 27d506d commit f59dd66
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions fastparquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,15 @@ def read_col(column, schema_helper, infile, use_cat=False,
num = 0
row_idx = 0
while True:
if ph.type == parquet_thrift.PageType.DICTIONARY_PAGE:
dic2 = np.array(read_dictionary_page(infile, schema_helper, ph, cmd))
dic2 = convert(dic2, se)
if use_cat and (dic2 != dic).any():
raise RuntimeError("Attempt to read as categorical a column"
"with multiple dictionary pages.")
dic = dic2
ph = read_thrift(infile, parquet_thrift.PageHeader)
continue
if (selfmade and hasattr(cmd, 'statistics') and
getattr(cmd.statistics, 'null_count', 1) == 0):
skip_nulls = True
Expand Down

0 comments on commit f59dd66

Please sign in to comment.