Skip to content

Commit

Permalink
API: disallow duplicate level names (pandas-dev#18882)
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz authored and hexgnu committed Jan 1, 2018
1 parent 48ae9dc commit 01e4216
Show file tree
Hide file tree
Showing 11 changed files with 55 additions and 87 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ Other API Changes
- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`)
- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`)
- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`).
- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`)
- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`)
- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`)
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
Expand Down
15 changes: 8 additions & 7 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,23 +579,24 @@ def _set_names(self, names, level=None, validate=True):

if level is None:
level = range(self.nlevels)
used = {}
else:
level = [self._get_level_number(l) for l in level]
used = {self.levels[l].name: l
for l in set(range(self.nlevels)) - set(level)}

# set the name
for l, name in zip(level, names):
if name is not None and name in used:
raise ValueError('Duplicated level name: "{}", assigned to '
'level {}, is already used for level '
'{}.'.format(name, l, used[name]))
self.levels[l].rename(name, inplace=True)
used[name] = l

names = property(fset=_set_names, fget=_get_names,
doc="Names of levels in MultiIndex")

def _reference_duplicate_name(self, name):
"""
Returns True if the name refered to in self.names is duplicated.
"""
# count the times name equals an element in self.names.
return sum(name == n for n in self.names) > 1

def _format_native_types(self, na_rep='nan', **kwargs):
new_levels = []
new_labels = []
Expand Down
11 changes: 0 additions & 11 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,6 @@ def __init__(self, values, index, level=-1, value_columns=None,

self.index = index

if isinstance(self.index, MultiIndex):
if index._reference_duplicate_name(level):
msg = ("Ambiguous reference to {level}. The index "
"names are not unique.".format(level=level))
raise ValueError(msg)

self.level = self.index._get_level_number(level)

# when index includes `nan`, need to lift levels/strides by 1
Expand Down Expand Up @@ -502,11 +496,6 @@ def factorize(index):
return categories, codes

N, K = frame.shape
if isinstance(frame.columns, MultiIndex):
if frame.columns._reference_duplicate_name(level):
msg = ("Ambiguous reference to {level}. The column "
"names are not unique.".format(level=level))
raise ValueError(msg)

# Will also convert negative level numbers and check if out of bounds.
level_num = frame.columns._get_level_number(level)
Expand Down
36 changes: 19 additions & 17 deletions pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,20 @@ def test_set_index2(self):
result = df.set_index(df.C)
assert result.index.name == 'C'

@pytest.mark.parametrize('level', ['a', pd.Series(range(3), name='a')])
def test_set_index_duplicate_names(self, level):
# GH18872
df = pd.DataFrame(np.arange(8).reshape(4, 2), columns=['a', 'b'])

# Pass an existing level name:
df.index.name = 'a'
pytest.raises(ValueError, df.set_index, level, append=True)
pytest.raises(ValueError, df.set_index, [level], append=True)

# Pass twice the same level name:
df.index.name = 'c'
pytest.raises(ValueError, df.set_index, [level, level])

def test_set_index_nonuniq(self):
df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
'B': ['one', 'two', 'three', 'one', 'two'],
Expand Down Expand Up @@ -591,19 +605,6 @@ def test_reorder_levels(self):
index=e_idx)
assert_frame_equal(result, expected)

result = df.reorder_levels([0, 0, 0])
e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']],
labels=[[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0]],
names=['L0', 'L0', 'L0'])
expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
index=e_idx)
assert_frame_equal(result, expected)

result = df.reorder_levels(['L0', 'L0', 'L0'])
assert_frame_equal(result, expected)

def test_reset_index(self):
stacked = self.frame.stack()[::2]
stacked = DataFrame({'foo': stacked, 'bar': stacked})
Expand Down Expand Up @@ -831,7 +832,7 @@ def test_set_index_names(self):

mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B'])
mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values,
names=['A', 'B', 'A', 'B'])
names=['A', 'B', 'C', 'D'])

df = df.set_index(['A', 'B'])

Expand All @@ -843,13 +844,14 @@ def test_set_index_names(self):
# Check actual equality
tm.assert_index_equal(df.set_index(df.index).index, mi)

idx2 = df.index.rename(['C', 'D'])

# Check that [MultiIndex, MultiIndex] yields a MultiIndex rather
# than a pair of tuples
assert isinstance(df.set_index(
[df.index, df.index]).index, MultiIndex)
assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex)

# Check equality
tm.assert_index_equal(df.set_index([df.index, df.index]).index, mi2)
tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2)

def test_rename_objects(self):
renamed = self.mixed_frame.rename(columns=str.upper)
Expand Down
10 changes: 0 additions & 10 deletions pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,16 +560,6 @@ def test_unstack_dtypes(self):
assert left.shape == (3, 2)
tm.assert_frame_equal(left, right)

def test_unstack_non_unique_index_names(self):
idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')],
names=['c1', 'c1'])
df = DataFrame([1, 2], index=idx)
with pytest.raises(ValueError):
df.unstack('c1')

with pytest.raises(ValueError):
df.T.stack('c1')

def test_unstack_nan_index(self): # GH7466
cast = lambda val: '{0:1}'.format('' if val != val else val)
nan = np.nan
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,15 +388,19 @@ def test_groupby_multi_categorical_as_index(self):
columns=['cat', 'A', 'B'])
tm.assert_frame_equal(result, expected)

# another not in-axis grouper (conflicting names in index)
s = Series(['a', 'b', 'b'], name='cat')
# another not in-axis grouper
s = Series(['a', 'b', 'b'], name='cat2')
result = df.groupby(['cat', s], as_index=False).sum()
expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
'A': [10.0, nan, nan, 22.0, nan, nan],
'B': [101.0, nan, nan, 205.0, nan, nan]},
columns=['cat', 'A', 'B'])
tm.assert_frame_equal(result, expected)

# GH18872: conflicting names in desired index
pytest.raises(ValueError, lambda: df.groupby(['cat',
s.rename('cat')]).sum())

# is original index dropped?
expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
'A': [10, 11, 10, 11, 10, 11],
Expand Down
31 changes: 17 additions & 14 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,15 +536,6 @@ def test_names(self):
level_names = [level.name for level in index.levels]
assert ind_names == level_names

def test_reference_duplicate_name(self):
idx = MultiIndex.from_tuples(
[('a', 'b'), ('c', 'd')], names=['x', 'x'])
assert idx._reference_duplicate_name('x')

idx = MultiIndex.from_tuples(
[('a', 'b'), ('c', 'd')], names=['x', 'y'])
assert not idx._reference_duplicate_name('x')

def test_astype(self):
expected = self.index.copy()
actual = self.index.astype('O')
Expand Down Expand Up @@ -609,6 +600,23 @@ def test_constructor_mismatched_label_levels(self):
with tm.assert_raises_regex(ValueError, label_error):
self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]])

@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
[1, 'a', 1]])
def test_duplicate_level_names(self, names):
# GH18872
pytest.raises(ValueError, pd.MultiIndex.from_product,
[[0, 1]] * 3, names=names)

# With .rename()
mi = pd.MultiIndex.from_product([[0, 1]] * 3)
tm.assert_raises_regex(ValueError, "Duplicated level name:",
mi.rename, names)

# With .rename(., level=)
mi.rename(names[0], level=1, inplace=True)
tm.assert_raises_regex(ValueError, "Duplicated level name:",
mi.rename, names[:2], level=[0, 2])

def assert_multiindex_copied(self, copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
Expand Down Expand Up @@ -667,11 +675,6 @@ def test_changing_names(self):
shallow_copy.names = [name + "c" for name in shallow_copy.names]
self.check_level_names(self.index, new_names)

def test_duplicate_names(self):
self.index.names = ['foo', 'foo']
tm.assert_raises_regex(KeyError, 'Level foo not found',
self.index._get_level_number, 'foo')

def test_get_level_number_integer(self):
self.index.names = [1, 0]
assert self.index._get_level_number(1) == 0
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,8 +589,8 @@ def test_to_latex_no_bold_rows(self):
"""
assert observed == expected

@pytest.mark.parametrize('name0', [None, 'named'])
@pytest.mark.parametrize('name1', [None, 'named'])
@pytest.mark.parametrize('name0', [None, 'named0'])
@pytest.mark.parametrize('name1', [None, 'named1'])
@pytest.mark.parametrize('axes', [[0], [1], [0, 1]])
def test_to_latex_multiindex_names(self, name0, name1, axes):
# GH 18667
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1908,12 +1908,6 @@ def make_index(names=None):
'a', 'b'], index=make_index(['date', 'a', 't']))
pytest.raises(ValueError, store.append, 'df', df)

# dup within level
_maybe_remove(store, 'df')
df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'],
index=make_index(['date', 'date', 'date']))
pytest.raises(ValueError, store.append, 'df', df)

# fully names
_maybe_remove(store, 'df')
df = DataFrame(np.zeros((12, 2)), columns=[
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1623,14 +1623,9 @@ def test_crosstab_with_numpy_size(self):
tm.assert_frame_equal(result, expected)

def test_crosstab_dup_index_names(self):
# GH 13279
# GH 13279, GH 18872
s = pd.Series(range(3), name='foo')
result = pd.crosstab(s, s)
expected_index = pd.Index(range(3), name='foo')
expected = pd.DataFrame(np.eye(3, dtype=np.int64),
index=expected_index,
columns=expected_index)
tm.assert_frame_equal(result, expected)
pytest.raises(ValueError, pd.crosstab, s, s)

@pytest.mark.parametrize("names", [['a', ('b', 'c')],
[('a', 'b'), 'c']])
Expand Down
11 changes: 0 additions & 11 deletions pandas/tests/series/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,17 +214,6 @@ def test_reorder_levels(self):
expected = Series(np.arange(6), index=e_idx)
assert_series_equal(result, expected)

result = s.reorder_levels([0, 0, 0])
e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']],
labels=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0]],
names=['L0', 'L0', 'L0'])
expected = Series(np.arange(6), index=e_idx)
assert_series_equal(result, expected)

result = s.reorder_levels(['L0', 'L0', 'L0'])
assert_series_equal(result, expected)

def test_rename_axis_inplace(self):
# GH 15704
series = self.ts.copy()
Expand Down

0 comments on commit 01e4216

Please sign in to comment.