diff --git a/databricks/koalas/indexes.py b/databricks/koalas/indexes.py index af1460d484..d12e7d18f0 100644 --- a/databricks/koalas/indexes.py +++ b/databricks/koalas/indexes.py @@ -571,6 +571,35 @@ def __getattr__(self, item: str) -> Any: def rename(self, name, inplace=False): raise NotImplementedError() + @property + def levels(self) -> list: + """ + Names of index columns in list. + + .. note:: Be aware of the possibility of running into out + of memory issue if returned list is huge. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) + >>> mi.names = ['level_1', 'level_2'] + >>> kdf = ks.DataFrame({'a': [1, 2, 3]}, index=mi) + >>> kdf.index.levels + [['a', 'b', 'c'], ['d', 'e', 'f']] + + >>> mi = pd.MultiIndex.from_arrays((list('bac'), list('fee'))) + >>> mi.names = ['level_1', 'level_2'] + >>> kdf = ks.DataFrame({'a': [1, 2, 3]}, index=mi) + >>> kdf.index.levels + [['a', 'b', 'c'], ['e', 'f']] + """ + scols = self._kdf._internal.index_scols + row = self._kdf._sdf.select([F.collect_set(scol) for scol in scols]).first() + + # use sorting is because pandas doesn't care the appearance order of level + # names, so e.g. if ['b', 'd', 'a'] will return as ['a', 'b', 'd'] + return [sorted(col) for col in row] + def __repr__(self): max_display_count = get_option("display.max_rows") if max_display_count is None: diff --git a/databricks/koalas/missing/indexes.py b/databricks/koalas/missing/indexes.py index 1b64271821..3b5c9b06d9 100644 --- a/databricks/koalas/missing/indexes.py +++ b/databricks/koalas/missing/indexes.py @@ -128,7 +128,6 @@ class _MissingPandasLikeMultiIndex(object): T = unsupported_property('T') codes = unsupported_property('codes') is_all_dates = unsupported_property('is_all_dates') - levels = unsupported_property('levels') levshape = unsupported_property('levshape') shape = unsupported_property('shape') diff --git a/databricks/koalas/tests/test_indexes.py b/databricks/koalas/tests/test_indexes.py index d9bcf5ab31..d62edc1f9f 100644 --- a/databricks/koalas/tests/test_indexes.py +++ b/databricks/koalas/tests/test_indexes.py @@ -283,3 +283,15 @@ def test_multiindex_nlevel(self): kdf = ks.from_pandas(pdf) self.assertEqual(kdf.index.nlevels, 2) + + def test_multiindex_levels(self): + tuples = [[list('abc'), list('def')], [list('aac'), list('fed')]] + + for tup in tuples: + pdf = pd.DataFrame({'a': [1, 2, 3]}, index=tup) + kdf = ks.from_pandas(pdf) + + # pandas returns FronzeList, so need to convert it to normal list + # for comparison + pdf_levels = [list(i) for i in pdf.index.levels] + self.assertEqual(pdf_levels, kdf.index.levels) diff --git a/docs/source/reference/indexing.rst b/docs/source/reference/indexing.rst index 0a2904ea70..be16e94cd2 100644 --- a/docs/source/reference/indexing.rst +++ b/docs/source/reference/indexing.rst @@ -78,6 +78,7 @@ MultiIndex Properties MultiIndex.names MultiIndex.ndim + MultiIndex.levels MultiIndex Modifying and computations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~