Skip to content

Commit

Permalink
Update docs for select and where
Browse files Browse the repository at this point in the history
  • Loading branch information
SamLau95 committed Sep 14, 2016
1 parent 73f1dab commit df576ca
Showing 1 changed file with 101 additions and 77 deletions.
178 changes: 101 additions & 77 deletions datascience/tables.py
Expand Up @@ -22,6 +22,7 @@
import datascience.maps as _maps
import datascience.formats as _formats
import datascience.util as _util
from datascience.util import make_array
import datascience.predicates as _predicates

class Table(collections.abc.MutableMapping):
Expand Down Expand Up @@ -537,57 +538,60 @@ def copy(self, *, shallow=False):
return table

def select(self, *column_label_or_labels):
"""Return a Table with selected column or columns by label or index.
"""
Returns a new ``Table`` with only the columns in
``column_label_or_labels``.
Args:
``column_label_or_labels`` (string, list of strings, or several
separate argument strings): The header names or indices of the
columns to be selected. ``column_label_or_labels`` must
be an existing header name, or a valid column index, or a list
thereof.
``column_label_or_labels``: Columns to select from the ``Table`` as
either column labels (``str``) or column indices (``int``).
Returns:
An instance of ``Table`` containing only selected columns.
An new instance of ``Table`` containing only selected columns.
The columns of the new ``Table`` are in the order given in
``column_label_or_labels``.
Raises:
``KeyError`` if any of ``column_label_or_labels`` are not in the
table.
>>> flowers = Table().with_columns(
... 'Number of petals', make_array(8, 34, 5),
... 'Name', make_array('lotus', 'sunflower', 'rose'),
... 'Weight', make_array(10, 5, 6)
... )
>>> flowers
Number of petals | Name | Weight
8 | lotus | 10
34 | sunflower | 5
5 | rose | 6
>>> flowers.select('Number of petals', 'Weight')
Number of petals | Weight
8 | 10
34 | 5
5 | 6
>>> flowers # original table unchanged
Number of petals | Name | Weight
8 | lotus | 10
34 | sunflower | 5
5 | rose | 6
>>> flowers.select(0, 2)
Number of petals | Weight
8 | 10
34 | 5
5 | 6
>>> # You may also pass in a list
>>> flowers.select(['Number of petals', 'Weight'])
Number of petals | Weight
8 | 10
34 | 5
5 | 6
>>> t = Table().with_columns([
... 'burgers', ['cheeseburger', 'hamburger', 'veggie burger'],
... 'prices', [6, 5, 5],
... 'calories', [743, 651, 582]])
>>> t
burgers | prices | calories
cheeseburger | 6 | 743
hamburger | 5 | 651
veggie burger | 5 | 582
>>> t.select(['burgers', 'calories'])
burgers | calories
cheeseburger | 743
hamburger | 651
veggie burger | 582
>>> t.select('prices')
prices
6
5
5
>>> t.select('burgers', 'calories')
burgers | calories
cheeseburger | 743
hamburger | 651
veggie burger | 582
>>> t.select(1)
prices
6
5
5
>>> t.select([2, 0])
calories | burgers
743 | cheeseburger
651 | hamburger
582 | veggie burger
>>> t.select(2, 0)
calories | burgers
743 | cheeseburger
651 | hamburger
582 | veggie burger
"""
labels = self._varargs_as_labels(column_label_or_labels)
table = Table()
Expand Down Expand Up @@ -659,64 +663,84 @@ def drop(self, *column_label_or_labels):
return self.select([c for (i, c) in enumerate(self.labels) if i not in exclude and c not in exclude])

def where(self, column_or_label, value_or_predicate=None, other=None):
"""Return a Table of rows for which the column is ``value`` or a non-zero value.
If ``column_or_label`` contains Boolean values, returns rows corresponding to True.
"""
Return a new ``Table`` containing rows where ``value_or_predicate``
returns True for values in ``column_or_label``.
Args:
``column_or_label``: The header name of a column in the table or an array.
``column_or_label``: A column of the ``Table`` either as a label
(``str``) or an index (``int``). Can also be an array of booleans;
only the rows where the array value is ``True`` are kept.
``value_or_predicate``: If a function, it is applied to every value
in ``column_or_label``. Only the rows where ``value_or_predicate``
returns True are kept. If a single value, only the rows where the
values in ``column_or_label`` are equal to ``value_or_predicate``
are kept.
``value_or_predicate``: Value to compare to items in column
or function to apply to items in column.
``other``: Optional additional column label for
``value_or_predicate`` to make pairwise comparisons. See the
examples below for usage. When ``other`` is supplied,
``value_or_predicate`` must be a callable function.
Returns:
An instance of ``Table`` containing rows for which the ``column_or_label``
column or ``column_or_label`` itself is non-zero or True, or is equal to ``value``,
if provided.
If ``value_or_predicate`` is a function, returns a new ``Table``
containing only the rows where ``value_or_predicate(val)`` is True
for the ``val``s in ``column_or_label``.
If ``value_or_predicate`` is a value, returns a new ``Table``
containing only the rows where the values in ``column_or_label``
are equal to ``value_or_predicate``.
If ``column_or_label`` is an array of booleans, returns a new
``Table`` containing only the rows where ``column_or_label`` is
``True``.
>>> marbles = Table().with_columns(
... "Color", make_array("Red", "Green", "Blue", "Red", "Green", "Green"),
... "Shape", make_array("Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"),
... "Amount", make_array(4, 6, 12, 7, 9, 2),
... "Price", make_array(1.30, 1.20, 2.00, 1.75, 0, 3.00))
>>> marbles = Table().with_columns([
... "Color", ["Red", "Green", "Blue", "Red", "Green", "Green"],
... "Shape", ["Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"],
... "Amount", [4, 6, 12, 7, 9, 2],
... "Price", [1.30, 1.20, 2.00, 1.75, 1.40, 3.00]])
>>> marbles
Color | Shape | Amount | Price
Red | Round | 4 | 1.3
Green | Rectangular | 6 | 1.2
Blue | Rectangular | 12 | 2
Red | Round | 7 | 1.75
Green | Rectangular | 9 | 1.4
Green | Rectangular | 9 | 0
Green | Round | 2 | 3
>>> marbles.where("Shape", "Round")
Color | Shape | Amount | Price
Red | Round | 4 | 1.3
Red | Round | 7 | 1.75
Green | Round | 2 | 3
>>> marbles.where(marbles.column("Shape") == "Round") # equivalent to previous example
Use a value to select matching rows
>>> marbles.where("Price", 1.3)
Color | Shape | Amount | Price
Red | Round | 4 | 1.3
Red | Round | 7 | 1.75
Green | Round | 2 | 3
>>> marbles.where(marbles.column("Price") > 1.5)
Color | Shape | Amount | Price
Blue | Rectangular | 12 | 2
Red | Round | 7 | 1.75
Green | Round | 2 | 3
You can also use predicates to simplify single-column comparisons.
In general, a higher order predicate function such as the functions in
``datascience.predicates.are`` can be used.
>>> from datascience.predicates import are
>>> marbles.where("Price", are.above(1.5)) # equivalent to previous example
>>> # equivalent to previous example
>>> marbles.where("Price", are.equal_to(1.3))
Color | Shape | Amount | Price
Red | Round | 4 | 1.3
>>> marbles.where("Price", are.above(1.5))
Color | Shape | Amount | Price
Blue | Rectangular | 12 | 2
Red | Round | 7 | 1.75
Green | Round | 2 | 3
And apply some predicates to compare columns.
Use the optional argument ``other`` to apply predicates to compare
columns.
>>> marbles.where("Price", are.above, "Amount")
Color | Shape | Amount | Price
Green | Round | 2 | 3
>>> marbles.where("Price", are.equal_to, "Amount") # empty table
Color | Shape | Amount | Price
"""
column = self._get_column(column_or_label)
if other is not None:
Expand Down

0 comments on commit df576ca

Please sign in to comment.