diff --git a/databricks/koalas/frame.py b/databricks/koalas/frame.py index 0cfd127dde..6af8faaf9e 100644 --- a/databricks/koalas/frame.py +++ b/databricks/koalas/frame.py @@ -4229,6 +4229,63 @@ def head(self, n=5): return DataFrame(self._internal.copy(sdf=self._sdf.limit(n))) + def tail(self, n=5): + """ + Return the last `n` rows. + + This function returns the last `n` rows for the object based + on position. It is useful for quickly verifying data, + for example, after sorting or appending rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + obj_tail : same type as caller + The last `n` rows of the caller object. + + Examples + -------- + >>> df = ks.DataFrame({'animal':['alligator', 'bee', 'falcon', 'lion', + ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the last 5 lines + + >>> df.tail() + animal + 8 zebra + 7 whale + 6 shark + 5 parrot + 4 monkey + + Viewing the last `n` lines (three in this case) + + >>> df.tail(3) + animal + 8 zebra + 7 whale + 6 shark + """ + tmp_col = '__order__' + sdf = self._sdf.withColumn(tmp_col, F.monotonically_increasing_id()) + + return DataFrame(self._internal.copy(sdf=sdf.orderBy(tmp_col, ascending=False).limit(n))) + def pivot_table(self, values=None, index=None, columns=None, aggfunc='mean', fill_value=None): """ diff --git a/databricks/koalas/missing/frame.py b/databricks/koalas/missing/frame.py index bdb9963d77..9f5ea643a9 100644 --- a/databricks/koalas/missing/frame.py +++ b/databricks/koalas/missing/frame.py @@ -86,7 +86,6 @@ class _MissingPandasLikeDataFrame(object): stack = unsupported_function('stack') swapaxes = unsupported_function('swapaxes') swaplevel = unsupported_function('swaplevel') - tail = unsupported_function('tail') take = unsupported_function('take') to_feather = unsupported_function('to_feather') to_gbq = unsupported_function('to_gbq') diff --git a/databricks/koalas/missing/series.py b/databricks/koalas/missing/series.py index 51071bc963..8eaa151950 100644 --- a/databricks/koalas/missing/series.py +++ b/databricks/koalas/missing/series.py @@ -95,7 +95,6 @@ class _MissingPandasLikeSeries(object): squeeze = unsupported_function('squeeze') swapaxes = unsupported_function('swapaxes') swaplevel = unsupported_function('swaplevel') - tail = unsupported_function('tail') take = unsupported_function('take') to_hdf = unsupported_function('to_hdf') to_period = unsupported_function('to_period') diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 0fd25c07bf..3d5e69ce30 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -1736,6 +1736,32 @@ def head(self, n=5): """ return _col(self.to_dataframe().head(n)) + def tail(self, n=5): + """ + Return the last n rows. + + This function returns the last n rows for the object based on position. + It is useful for quickly verifying data, + for example, after sorting or appending rows. + + Parameters + ---------- + n : Integer, default = 5 + + Returns + ------- + The last n rows of the caller object. + + Examples + -------- + >>> df = ks.DataFrame({'animal':['alligator', 'bee', 'falcon', 'lion']}) + >>> df.animal.tail(2) # doctest: +NORMALIZE_WHITESPACE + 3 lion + 2 falcon + Name: animal, dtype: object + """ + return _col(self.to_dataframe().tail(n)) + # TODO: Categorical type isn't supported (due to PySpark's limitation) and # some doctests related with timestamps were not added. def unique(self): diff --git a/docs/source/reference/frame.rst b/docs/source/reference/frame.rst index ce740aa4e7..72907ebaed 100644 --- a/docs/source/reference/frame.rst +++ b/docs/source/reference/frame.rst @@ -148,6 +148,7 @@ Reindexing / Selection / Label manipulation DataFrame.set_index DataFrame.isin DataFrame.sample + DataFrame.tail .. _api.dataframe.missing: diff --git a/docs/source/reference/series.rst b/docs/source/reference/series.rst index 381b4d0637..ae8fdc676b 100644 --- a/docs/source/reference/series.rst +++ b/docs/source/reference/series.rst @@ -152,6 +152,7 @@ Reindexing / Selection / Label manipulation Series.rename Series.reset_index Series.sample + Series.tail Series.where Series.mask Series.truncate