Skip to content

Commit

Permalink
TST: Check merging on equivalent CategoricalDtype
Browse files Browse the repository at this point in the history
  • Loading branch information
gfyoung committed Mar 28, 2019
1 parent 882961d commit 69eda22
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Expand Up @@ -358,6 +358,7 @@ Reshaping
- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
- Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`)
- Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
- Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)

Sparse
^^^^^^
Expand Down
65 changes: 65 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Expand Up @@ -1666,3 +1666,68 @@ def test_merge_suffix_none_error(col1, col2, suffixes):
msg = "iterable"
with pytest.raises(TypeError, match=msg):
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)


@pytest.mark.parametrize("cat_dtype", ["one", "two"])
@pytest.mark.parametrize("reverse", [True, False])
def test_merge_equal_cat_dtypes(cat_dtype, reverse):
# see gh-22501
cat_dtypes = {
"one": CategoricalDtype(categories=["a", "b", "c"], ordered=False),
"two": CategoricalDtype(categories=["a", "b", "c"], ordered=False),
}

df1 = DataFrame({
"foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]),
"left": [1, 2, 3],
}).set_index("foo")

data_foo = ["a", "b", "c"]
data_right = [1, 2, 3]

if reverse:
data_foo.reverse()
data_right.reverse()

df2 = DataFrame({
"foo": Series(data_foo).astype(cat_dtypes[cat_dtype]),
"right": data_right
}).set_index("foo")

result = df1.merge(df2, left_index=True, right_index=True)

expected = DataFrame({
"left": [1, 2, 3],
"right": [1, 2, 3],
"foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]),
}).set_index("foo")

# Categorical is unordered, so don't check ordering.
tm.assert_frame_equal(result, expected, check_categorical=False)


def test_merge_equal_cat_dtypes2():
# see gh-22501
cat_dtype = CategoricalDtype(categories=["a", "b", "c"], ordered=False)

# Test Data
df1 = DataFrame({
"foo": Series(["a", "b"]).astype(cat_dtype),
"left": [1, 2],
}).set_index("foo")

df2 = DataFrame({
"foo": Series(["a", "b", "c"]).astype(cat_dtype),
"right": [3, 2, 1],
}).set_index("foo")

result = df1.merge(df2, left_index=True, right_index=True)

expected = DataFrame({
"left": [1, 2],
"right": [3, 2],
"foo": Series(["a", "b"]).astype(cat_dtype),
}).set_index("foo")

# Categorical is unordered, so don't check ordering.
tm.assert_frame_equal(result, expected, check_categorical=False)

0 comments on commit 69eda22

Please sign in to comment.