Skip to content

Commit

Permalink
BUG make hashtable.unique support readonly arrays
Browse files Browse the repository at this point in the history
This problem was brought up in
pandas-dev#18773 and effectively comes
down to how Cython deals with readonly arrays. While it would be ideal
for Cython to fix the underlying problem in the meantime we can rely on
this.
  • Loading branch information
hexgnu committed Dec 18, 2017
1 parent b5f1e71 commit 29fcb41
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 41 deletions.
95 changes: 54 additions & 41 deletions pandas/_libs/hashtable_class_helper.pxi.in
Expand Up @@ -255,10 +255,56 @@ dtypes = [('Float64', 'float64', 'val != val', True),
('UInt64', 'uint64', 'False', False),
('Int64', 'int64', 'val == iNaT', False)]

def get_dispatch(dtypes):
for (name, dtype, null_condition, float_group) in dtypes:
unique_template = """\
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
{dtype}_t val
khiter_t k
bint seen_na = 0
{name}Vector uniques = {name}Vector()
{name}VectorData *ud

ud = uniques.data

with nogil:
for i in range(n):
val = values[i]
IF {float_group}:
if val == val:
k = kh_get_{dtype}(self.table, val)
if k == self.table.n_buckets:
kh_put_{dtype}(self.table, val, &ret)
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{dtype}(ud, val)
elif not seen_na:
seen_na = 1
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{dtype}(ud, NAN)
ELSE:
k = kh_get_{dtype}(self.table, val)
if k == self.table.n_buckets:
kh_put_{dtype}(self.table, val, &ret)
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{dtype}(ud, val)
return uniques.to_array()
"""

unique_template = unique_template.format(name=name, dtype=dtype, null_condition=null_condition, float_group=float_group)

yield (name, dtype, null_condition, float_group, unique_template)
}}


{{for name, dtype, null_condition, float_group in dtypes}}
{{for name, dtype, null_condition, float_group, unique_template in get_dispatch(dtypes)}}

cdef class {{name}}HashTable(HashTable):

Expand Down Expand Up @@ -450,48 +496,15 @@ cdef class {{name}}HashTable(HashTable):
return np.asarray(labels), arr_uniques

@cython.boundscheck(False)
def unique(self, {{dtype}}_t[:] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
{{dtype}}_t val
khiter_t k
bint seen_na = 0
{{name}}Vector uniques = {{name}}Vector()
{{name}}VectorData *ud
def unique(self, ndarray[{{dtype}}_t, ndim=1] values):
if values.flags.writeable:
return self.unique_memview(values)

ud = uniques.data
{{unique_template}}

with nogil:
for i in range(n):
val = values[i]

{{if float_group}}
if val == val:
k = kh_get_{{dtype}}(self.table, val)
if k == self.table.n_buckets:
kh_put_{{dtype}}(self.table, val, &ret)
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{{dtype}}(ud, val)
elif not seen_na:
seen_na = 1
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{{dtype}}(ud, NAN)
{{else}}
k = kh_get_{{dtype}}(self.table, val)
if k == self.table.n_buckets:
kh_put_{{dtype}}(self.table, val, &ret)
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{{dtype}}(ud, val)
{{endif}}

return uniques.to_array()
@cython.boundscheck(False)
def unique_memview(self, {{dtype}}_t[:] values):
{{unique_template}}

{{endfor}}

Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/reshape/test_tile.py
Expand Up @@ -512,6 +512,16 @@ def f():
tm.assert_numpy_array_equal(
mask, np.array([False, True, True, True, True]))

def test_cut_read_only(self):
readonly = np.arange(0, 100, 10)
readonly.flags.writeable = False

mutable = np.arange(0, 100, 10)

one_to_hundred = np.arange(100)
tm.assert_categorical_equal(cut(one_to_hundred, readonly), \
cut(one_to_hundred, mutable))


def curpath():
pth, _ = os.path.split(os.path.abspath(__file__))
Expand Down

0 comments on commit 29fcb41

Please sign in to comment.