From 2edb71d06e2ba15cbc03c37efb18bd2adb7c3f0b Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Sat, 10 Oct 2015 02:03:01 -0700 Subject: [PATCH 1/6] Allow take to handle slicing --- datascience/tables.py | 110 +++++++++++++++++++++++------------------- tests/test_tables.py | 17 +++++++ 2 files changed, 78 insertions(+), 49 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index 3a2486142..418da1781 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -22,6 +22,65 @@ import datascience.util as _util + +class _Taker: + def __init__(self, table): + self._table = table + + def __call__(self, row_numbers): + """Return a Table of a sequence of rows taken by number. + + Args: + ``row_numbers`` (integer or list of integers): The list of row numbers to + be selected. + + Returns: + A ``Table`` containing only the selected rows. + + >>> print(t) + letter grade | gpa + A+ | 4 + A | 4 + A- | 3.7 + B+ | 3.3 + B | 3 + B- | 2.7 + >>> print(t.take(0)) + letter grade | gpa + A+ | 4 + >>> print(t.take(5)) + letter grade | gpa + B- | 2.7 + >>> print(t.take(-1)) + letter grade | gpa + B- | 2.7 + >>> print(t.take([2,1,0])) + letter grade | gpa + A- | 3.7 + A | 4 + A+ | 4 + >>> print(t.take([1,5])) + letter grade | gpa + A | 4 + B- | 2.7 + >>> print(t.take(range(3))) + letter grade | gpa + A+ | 4 + A | 4 + A- | 3.7 + """ + columns = [np.take(column, row_numbers, axis=0) for column in self._table.columns] + return self._table._with_columns(columns) + + def __getitem__(self, i): + rows = self._table.rows[i] + cols = self._table._columns.keys() + if not isinstance(rows, list): + rows = [rows] + + return Table.from_rows(rows, cols) + + class Table(collections.abc.MutableMapping): """A sequence of labeled columns.""" @@ -82,6 +141,8 @@ def __init__(self, columns=None, labels=None, for column, label in zip(columns, labels): self[label] = column + self.take = _Taker(self) + @classmethod def empty(cls, column_labels=None): """Create an empty table. Column labels are optional @@ -506,55 +567,6 @@ def drop(self, column_label_or_labels): """Return a Table with only columns other than selected label or labels.""" exclude = _as_labels(column_label_or_labels) return self.select([c for c in self.column_labels if c not in exclude]) - - def take(self, row_numbers): - """Return a Table of a sequence of rows taken by number. - - Args: - ``row_numbers`` (integer or list of integers): The list of row numbers to - be selected. - - Returns: - A ``Table`` containing only the selected rows. - - >>> grade = ['A+', 'A', 'A-', 'B+', 'B', 'B-'] - >>> gpa = [4, 4, 3.7, 3.3, 3, 2.7] - >>> t = Table([grade, gpa], ['letter grade', 'gpa']) - >>> t - letter grade | gpa - A+ | 4 - A | 4 - A- | 3.7 - B+ | 3.3 - B | 3 - B- | 2.7 - >>> t.take(0) - letter grade | gpa - A+ | 4 - >>> t.take(5) - letter grade | gpa - B- | 2.7 - >>> t.take(-1) - letter grade | gpa - B- | 2.7 - >>> t.take([2,1,0]) - letter grade | gpa - A- | 3.7 - A | 4 - A+ | 4 - >>> t.take([1,5]) - letter grade | gpa - A | 4 - B- | 2.7 - >>> t.take(range(3)) - letter grade | gpa - A+ | 4 - A | 4 - A- | 3.7 - """ - columns = [np.take(column, row_numbers, axis=0) for column in self.columns] - return self._with_columns(columns) - def where(self, column_or_label, value=None): """Return a Table of rows for which the column is value or a non-zero value.""" column = self._get_column(column_or_label) diff --git a/tests/test_tables.py b/tests/test_tables.py index 650bfb617..653cb9ace 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -118,6 +118,23 @@ def test_take(t): """) +def test_take_slice(t): + test = t.take[1:3] + assert_equal(test, """ + letter | count | points + b | 3 | 2 + c | 3 | 2 + """) + + +def test_take_slice_single(t): + test = t.take[1] + assert_equal(test, """ + letter | count | points + b | 3 | 2 + """) + + def test_stats(t): test = t.stats() assert_equal(test, """ From 44ee8868bf49d338feed371241c048681e39887d Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Mon, 12 Oct 2015 15:30:37 -0700 Subject: [PATCH 2/6] Expand take[] to also accept iterables. take() is now simply an alias to take[] --- datascience/tables.py | 37 ++++++++++++++++++++++--------------- tests/test_tables.py | 9 +++++++++ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index 418da1781..c3e7ab412 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -31,8 +31,8 @@ def __call__(self, row_numbers): """Return a Table of a sequence of rows taken by number. Args: - ``row_numbers`` (integer or list of integers): The list of row numbers to - be selected. + ``row_numbers`` (slice or integer or list of integers): + The list of row numbers to be selected. Returns: A ``Table`` containing only the selected rows. @@ -45,40 +45,47 @@ def __call__(self, row_numbers): B+ | 3.3 B | 3 B- | 2.7 - >>> print(t.take(0)) + >>> print(t.take[0]) letter grade | gpa A+ | 4 - >>> print(t.take(5)) + >>> print(t.take[5]) letter grade | gpa B- | 2.7 - >>> print(t.take(-1)) + >>> print(t.take[-1]) letter grade | gpa B- | 2.7 - >>> print(t.take([2,1,0])) + >>> print(t.take[2, 1, 0]) letter grade | gpa A- | 3.7 A | 4 A+ | 4 - >>> print(t.take([1,5])) + >>> print(t.take[1, 5]) letter grade | gpa A | 4 B- | 2.7 - >>> print(t.take(range(3))) + >>> print(t.take[:3]) letter grade | gpa A+ | 4 A | 4 A- | 3.7 """ - columns = [np.take(column, row_numbers, axis=0) for column in self._table.columns] - return self._table._with_columns(columns) + return self[row_numbers] def __getitem__(self, i): - rows = self._table.rows[i] - cols = self._table._columns.keys() - if not isinstance(rows, list): - rows = [rows] + if isinstance(i, collections.Iterable): + columns = [np.take(column, i, axis=0) + for column in self._table.columns] + return self._table._with_columns(columns) + elif isinstance(i, slice): + columns = [column[i] for column in self._table._columns.values()] + return self._table._with_columns(columns) + else: + rows = self._table.rows[i] + cols = self._table._columns.keys() + if not isinstance(rows, list): + rows = [rows] - return Table.from_rows(rows, cols) + return Table.from_rows(rows, cols) class Table(collections.abc.MutableMapping): diff --git a/tests/test_tables.py b/tests/test_tables.py index 653cb9ace..c94385992 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -135,6 +135,15 @@ def test_take_slice_single(t): """) +def test_take_iterable(t): + test = t.take[0, 2] + assert_equal(test, """ + letter | count | points + a | 9 | 1 + c | 3 | 2 + """) + + def test_stats(t): test = t.stats() assert_equal(test, """ From a3ecf5d8528ac322f9ee61fd8350ab743bfc3fd6 Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Mon, 12 Oct 2015 17:08:55 -0700 Subject: [PATCH 3/6] Sphinx: correctly document `take` --- datascience/tables.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index c3e7ab412..e39a90a09 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -27,12 +27,12 @@ class _Taker: def __init__(self, table): self._table = table - def __call__(self, row_numbers): + def __call__(self, row_numbers_or_slice): """Return a Table of a sequence of rows taken by number. Args: - ``row_numbers`` (slice or integer or list of integers): - The list of row numbers to be selected. + ``row_numbers_or_slice`` (slice or integer or list of integers): + The list of row numbers or a slice to be selected. Returns: A ``Table`` containing only the selected rows. @@ -69,7 +69,7 @@ def __call__(self, row_numbers): A | 4 A- | 3.7 """ - return self[row_numbers] + return self[row_numbers_or_slice] def __getitem__(self, i): if isinstance(i, collections.Iterable): @@ -150,6 +150,10 @@ def __init__(self, columns=None, labels=None, self.take = _Taker(self) + # This, along with a snippet below, is necessary for Sphinx to + # correctly load the `take` docstring + take = _Taker(None) + @classmethod def empty(cls, column_labels=None): """Create an empty table. Column labels are optional @@ -1620,6 +1624,10 @@ def __repr__(self): return '{0}({1})'.format(type(self).__name__, repr(self._table)) +# For Sphinx: grab the docstring from `Taker.__call__` +Table.take.__doc__ = _Taker.__call__.__doc__ + + class Q: """Query manager for Tables.""" array = None From 5929d573cef7abdde8fcf447abda26ef7614984a Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Tue, 20 Oct 2015 11:49:54 -0700 Subject: [PATCH 4/6] Update documentation to use old and new syntax --- datascience/tables.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index e39a90a09..c15074211 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -37,7 +37,7 @@ def __call__(self, row_numbers_or_slice): Returns: A ``Table`` containing only the selected rows. - >>> print(t) + >>> t letter grade | gpa A+ | 4 A | 4 @@ -45,29 +45,44 @@ def __call__(self, row_numbers_or_slice): B+ | 3.3 B | 3 B- | 2.7 - >>> print(t.take[0]) + >>> t.take(0) letter grade | gpa A+ | 4 - >>> print(t.take[5]) + >>> t.take(5) letter grade | gpa B- | 2.7 - >>> print(t.take[-1]) + >>> t.take(-1) letter grade | gpa B- | 2.7 - >>> print(t.take[2, 1, 0]) + >>> t.take([2, 1, 0]) letter grade | gpa A- | 3.7 A | 4 A+ | 4 - >>> print(t.take[1, 5]) + >>> print(t.take([1, 5])) letter grade | gpa A | 4 B- | 2.7 - >>> print(t.take[:3]) + >>> t.take(range(3)) + letter grade | gpa + A+ | 4 + A | 4 + A- | 3.7 + + Note that ``take`` also supports NumPy-like indexing and slicing: + + >>> t.take[:3] letter grade | gpa A+ | 4 A | 4 A- | 3.7 + + >>> t.take[2, 1, 0] + letter grade | gpa + A- | 3.7 + A | 4 + A+ | 4 + """ return self[row_numbers_or_slice] From 2badfffcb1c8af533823743a1d1b9b4fd9f7974b Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Tue, 20 Oct 2015 11:57:47 -0700 Subject: [PATCH 5/6] Access ._columns instead of .columns to avoid immediate unpacking into tuple --- datascience/tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datascience/tables.py b/datascience/tables.py index c15074211..9ea49f6d0 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -89,7 +89,7 @@ def __call__(self, row_numbers_or_slice): def __getitem__(self, i): if isinstance(i, collections.Iterable): columns = [np.take(column, i, axis=0) - for column in self._table.columns] + for column in self._table._columns.values()] return self._table._with_columns(columns) elif isinstance(i, slice): columns = [column[i] for column in self._table._columns.values()] From bd4ff840c24ad037e74012a0df0c2093b567e8fe Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Tue, 20 Oct 2015 11:59:24 -0700 Subject: [PATCH 6/6] Fix doctest --- datascience/tables.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datascience/tables.py b/datascience/tables.py index 9ea49f6d0..2921deaf6 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -37,6 +37,10 @@ def __call__(self, row_numbers_or_slice): Returns: A ``Table`` containing only the selected rows. + >>> grade = ['A+', 'A', 'A-', 'B+', 'B', 'B-'] + >>> gpa = [4, 4, 3.7, 3.3, 3, 2.7] + >>> t = Table([grade, gpa], ['letter grade', 'gpa']) + >>> t letter grade | gpa A+ | 4