Skip to content

Commit

Permalink
BUG: Series.combine() fails with ExtensionArray inside of Series (pan…
Browse files Browse the repository at this point in the history
  • Loading branch information
Dr-Irv authored and daminisatya committed Jun 8, 2018
1 parent 7471f3e commit 1027a1c
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 6 deletions.
9 changes: 9 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Expand Up @@ -181,9 +181,18 @@ Reshaping
-
-

ExtensionArray
^^^^^^^^^^^^^^

- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
-
-

Other
^^^^^

- :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`)
-
-
-
30 changes: 25 additions & 5 deletions pandas/core/series.py
Expand Up @@ -2204,7 +2204,7 @@ def _binop(self, other, func, level=None, fill_value=None):
result.name = None
return result

def combine(self, other, func, fill_value=np.nan):
def combine(self, other, func, fill_value=None):
"""
Perform elementwise binary operation on two Series using given function
with optional fill value when an index is missing from one Series or
Expand All @@ -2216,6 +2216,8 @@ def combine(self, other, func, fill_value=np.nan):
func : function
Function that takes two scalars as inputs and return a scalar
fill_value : scalar value
The default specifies to use the appropriate NaN value for
the underlying dtype of the Series
Returns
-------
Expand All @@ -2235,20 +2237,38 @@ def combine(self, other, func, fill_value=np.nan):
Series.combine_first : Combine Series values, choosing the calling
Series's values first
"""
if fill_value is None:
fill_value = na_value_for_dtype(self.dtype, compat=False)

if isinstance(other, Series):
# If other is a Series, result is based on union of Series,
# so do this element by element
new_index = self.index.union(other.index)
new_name = ops.get_op_result_name(self, other)
new_values = np.empty(len(new_index), dtype=self.dtype)
for i, idx in enumerate(new_index):
new_values = []
for idx in new_index:
lv = self.get(idx, fill_value)
rv = other.get(idx, fill_value)
with np.errstate(all='ignore'):
new_values[i] = func(lv, rv)
new_values.append(func(lv, rv))
else:
# Assume that other is a scalar, so apply the function for
# each element in the Series
new_index = self.index
with np.errstate(all='ignore'):
new_values = func(self._values, other)
new_values = [func(lv, other) for lv in self._values]
new_name = self.name

if is_categorical_dtype(self.values):
pass
elif is_extension_array_dtype(self.values):
# The function can return something of any type, so check
# if the type is compatible with the calling EA
try:
new_values = self._values._from_sequence(new_values)
except TypeError:
pass

return self._constructor(new_values, index=new_index, name=new_name)

def combine_first(self, other):
Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/extension/base/methods.py
Expand Up @@ -103,3 +103,37 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel):

tm.assert_numpy_array_equal(l1, l2)
self.assert_extension_array_equal(u1, u2)

def test_combine_le(self, data_repeated):
# GH 20825
# Test that combine works when doing a <= (le) comparison
orig_data1, orig_data2 = data_repeated(2)
s1 = pd.Series(orig_data1)
s2 = pd.Series(orig_data2)
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
expected = pd.Series([a <= b for (a, b) in
zip(list(orig_data1), list(orig_data2))])
self.assert_series_equal(result, expected)

val = s1.iloc[0]
result = s1.combine(val, lambda x1, x2: x1 <= x2)
expected = pd.Series([a <= val for a in list(orig_data1)])
self.assert_series_equal(result, expected)

def test_combine_add(self, data_repeated):
# GH 20825
orig_data1, orig_data2 = data_repeated(2)
s1 = pd.Series(orig_data1)
s2 = pd.Series(orig_data2)
result = s1.combine(s2, lambda x1, x2: x1 + x2)
expected = pd.Series(
orig_data1._from_sequence([a + b for (a, b) in
zip(list(orig_data1),
list(orig_data2))]))
self.assert_series_equal(result, expected)

val = s1.iloc[0]
result = s1.combine(val, lambda x1, x2: x1 + x2)
expected = pd.Series(
orig_data1._from_sequence([a + val for a in list(orig_data1)]))
self.assert_series_equal(result, expected)
26 changes: 26 additions & 0 deletions pandas/tests/extension/category/test_categorical.py
@@ -1,6 +1,7 @@
import string

import pytest
import pandas as pd
import numpy as np

from pandas.api.types import CategoricalDtype
Expand Down Expand Up @@ -29,6 +30,15 @@ def data_missing():
return Categorical([np.nan, 'A'])


@pytest.fixture
def data_repeated():
"""Return different versions of data for count times"""
def gen(count):
for _ in range(count):
yield Categorical(make_data())
yield gen


@pytest.fixture
def data_for_sorting():
return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],
Expand Down Expand Up @@ -154,6 +164,22 @@ class TestMethods(base.BaseMethodsTests):
def test_value_counts(self, all_data, dropna):
pass

def test_combine_add(self, data_repeated):
# GH 20825
# When adding categoricals in combine, result is a string
orig_data1, orig_data2 = data_repeated(2)
s1 = pd.Series(orig_data1)
s2 = pd.Series(orig_data2)
result = s1.combine(s2, lambda x1, x2: x1 + x2)
expected = pd.Series(([a + b for (a, b) in
zip(list(orig_data1), list(orig_data2))]))
self.assert_series_equal(result, expected)

val = s1.iloc[0]
result = s1.combine(val, lambda x1, x2: x1 + x2)
expected = pd.Series([a + val for a in list(orig_data1)])
self.assert_series_equal(result, expected)


class TestCasting(base.BaseCastingTests):
pass
9 changes: 9 additions & 0 deletions pandas/tests/extension/conftest.py
Expand Up @@ -30,6 +30,15 @@ def all_data(request, data, data_missing):
return data_missing


@pytest.fixture
def data_repeated():
"""Return different versions of data for count times"""
def gen(count):
for _ in range(count):
yield NotImplementedError
yield gen


@pytest.fixture
def data_for_sorting():
"""Length-3 array with a known sort order.
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/extension/decimal/array.py
Expand Up @@ -28,7 +28,9 @@ class DecimalArray(ExtensionArray):
dtype = DecimalDtype()

def __init__(self, values):
assert all(isinstance(v, decimal.Decimal) for v in values)
for val in values:
if not isinstance(val, self.dtype.type):
raise TypeError
values = np.asarray(values, dtype=object)

self._data = values
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/decimal/test_decimal.py
Expand Up @@ -25,6 +25,14 @@ def data_missing():
return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])


@pytest.fixture
def data_repeated():
def gen(count):
for _ in range(count):
yield DecimalArray(make_data())
yield gen


@pytest.fixture
def data_for_sorting():
return DecimalArray([decimal.Decimal('1'),
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/json/test_json.py
Expand Up @@ -187,6 +187,14 @@ def test_sort_values_missing(self, data_missing_for_sorting, ascending):
super(TestMethods, self).test_sort_values_missing(
data_missing_for_sorting, ascending)

@pytest.mark.skip(reason="combine for JSONArray not supported")
def test_combine_le(self, data_repeated):
pass

@pytest.mark.skip(reason="combine for JSONArray not supported")
def test_combine_add(self, data_repeated):
pass


class TestCasting(BaseJSON, base.BaseCastingTests):
@pytest.mark.xfail
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/series/test_combine_concat.py
Expand Up @@ -60,6 +60,19 @@ def test_append_duplicates(self):
with tm.assert_raises_regex(ValueError, msg):
pd.concat([s1, s2], verify_integrity=True)

def test_combine_scalar(self):
# GH 21248
# Note - combine() with another Series is tested elsewhere because
# it is used when testing operators
s = pd.Series([i * 10 for i in range(5)])
result = s.combine(3, lambda x, y: x + y)
expected = pd.Series([i * 10 + 3 for i in range(5)])
tm.assert_series_equal(result, expected)

result = s.combine(22, lambda x, y: min(x, y))
expected = pd.Series([min(i * 10, 22) for i in range(5)])
tm.assert_series_equal(result, expected)

def test_combine_first(self):
values = tm.makeIntIndex(20).values.astype(float)
series = Series(values, index=tm.makeIntIndex(20))
Expand Down

0 comments on commit 1027a1c

Please sign in to comment.