Skip to content

Commit

Permalink
Allow categorical column with no categories (#888)
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Oct 10, 2023
1 parent 58cdab6 commit 8e9d419
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
7 changes: 6 additions & 1 deletion fastparquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,12 @@ def read_col(column, schema_helper, infile, use_cat=False,
column.
"""
cmd = column.meta_data
se = schema_helper.schema_element(cmd.path_in_schema)
try:
se = schema_helper.schema_element(cmd.path_in_schema)
except KeyError:
# column not present in this row group
assign[:] = None
return
off = min((cmd.dictionary_page_offset or cmd.data_page_offset,
cmd.data_page_offset))

Expand Down
2 changes: 1 addition & 1 deletion fastparquet/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def set_cats(values, i=i, col=col, **kwargs):
shape[-1] = size

if isinstance(bvalues, Categorical):
code = np.zeros(shape=shape, dtype=bvalues.codes.dtype)
code = np.full(fill_value=-1, shape=shape, dtype=bvalues.codes.dtype)

values = Categorical.from_codes(codes=code, dtype=bvalues.dtype)

Expand Down
9 changes: 8 additions & 1 deletion fastparquet/test/test_dataframe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings
from unittest import mock

import numpy as np
import pandas as pd
import pytest
from numpy import empty as np_empty
Expand All @@ -26,14 +27,20 @@ def test_empty():
df, views = empty('category', size=n, cols=['c'],
cats={'c': ['one', 'two']})
views['c'][0] = 1
assert df.c[:2].tolist() == ['two', 'one']
assert df.c[:2].tolist() == ['two', np.nan]

df, views = empty('i4,i8,f8,f8,O', size=n,
cols=['i4', 'i8', 'f8_1', 'f8_2', 'O'])
assert df.shape == (n, 5)
assert len(views) == 5


def test_no_cats():
df, views = empty('category', size=10, cols=['c'],
cats={'c': []})
assert (views["c"] == -1).all()


def test_empty_tz_utc():
with warnings.catch_warnings():
warnings.simplefilter("error")
Expand Down

0 comments on commit 8e9d419

Please sign in to comment.