Skip to content

Commit

Permalink
Implements tail() for DataFrame & Series
Browse files Browse the repository at this point in the history
  • Loading branch information
itholic committed Nov 19, 2019
1 parent 221b5a2 commit ab27a0b
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 2 deletions.
57 changes: 57 additions & 0 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4229,6 +4229,63 @@ def head(self, n=5):

return DataFrame(self._internal.copy(sdf=self._sdf.limit(n)))

def tail(self, n=5):
"""
Return the last `n` rows.
This function returns the last `n` rows for the object based
on position. It is useful for quickly verifying data,
for example, after sorting or appending rows.
Parameters
----------
n : int, default 5
Number of rows to select.
Returns
-------
obj_tail : same type as caller
The last `n` rows of the caller object.
Examples
--------
>>> df = ks.DataFrame({'animal':['alligator', 'bee', 'falcon', 'lion',
... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
>>> df
animal
0 alligator
1 bee
2 falcon
3 lion
4 monkey
5 parrot
6 shark
7 whale
8 zebra
Viewing the last 5 lines
>>> df.tail()
animal
8 zebra
7 whale
6 shark
5 parrot
4 monkey
Viewing the last `n` lines (three in this case)
>>> df.tail(3)
animal
8 zebra
7 whale
6 shark
"""
tmp_col = '__order__'
sdf = self._sdf.withColumn(tmp_col, F.monotonically_increasing_id())

return DataFrame(self._internal.copy(sdf=sdf.orderBy(tmp_col, ascending=False).limit(n)))

def pivot_table(self, values=None, index=None, columns=None,
aggfunc='mean', fill_value=None):
"""
Expand Down
1 change: 0 additions & 1 deletion databricks/koalas/missing/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ class _MissingPandasLikeDataFrame(object):
stack = unsupported_function('stack')
swapaxes = unsupported_function('swapaxes')
swaplevel = unsupported_function('swaplevel')
tail = unsupported_function('tail')
take = unsupported_function('take')
to_feather = unsupported_function('to_feather')
to_gbq = unsupported_function('to_gbq')
Expand Down
1 change: 0 additions & 1 deletion databricks/koalas/missing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ class _MissingPandasLikeSeries(object):
squeeze = unsupported_function('squeeze')
swapaxes = unsupported_function('swapaxes')
swaplevel = unsupported_function('swaplevel')
tail = unsupported_function('tail')
take = unsupported_function('take')
to_hdf = unsupported_function('to_hdf')
to_period = unsupported_function('to_period')
Expand Down
26 changes: 26 additions & 0 deletions databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1736,6 +1736,32 @@ def head(self, n=5):
"""
return _col(self.to_dataframe().head(n))

def tail(self, n=5):
"""
Return the last n rows.
This function returns the last n rows for the object based on position.
It is useful for quickly verifying data,
for example, after sorting or appending rows.
Parameters
----------
n : Integer, default = 5
Returns
-------
The last n rows of the caller object.
Examples
--------
>>> df = ks.DataFrame({'animal':['alligator', 'bee', 'falcon', 'lion']})
>>> df.animal.tail(2) # doctest: +NORMALIZE_WHITESPACE
3 lion
2 falcon
Name: animal, dtype: object
"""
return _col(self.to_dataframe().tail(n))

# TODO: Categorical type isn't supported (due to PySpark's limitation) and
# some doctests related with timestamps were not added.
def unique(self):
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/frame.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ Reindexing / Selection / Label manipulation
DataFrame.set_index
DataFrame.isin
DataFrame.sample
DataFrame.tail

.. _api.dataframe.missing:

Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ Reindexing / Selection / Label manipulation
Series.rename
Series.reset_index
Series.sample
Series.tail
Series.where
Series.mask
Series.truncate
Expand Down

0 comments on commit ab27a0b

Please sign in to comment.