Merge branch 'master' of https://github.com/dsten/datascience

data-8 · Oct 31, 2016 · 6d99894 · 6d99894
2 parents abe1955 + a01e52f
commit 6d99894
Showing 1 changed file with 35 additions and 17 deletions.
diff --git a/datascience/tables.py b/datascience/tables.py
@@ -1009,20 +1009,25 @@ def pivot(self, columns, rows, values=None, collect=None, zero=None):
         the values that match both row and column based on ``collect``.
 
         Args:
-            ``columns`` -- a single column label, (``str``), in self, used to
-                create new columns, based on its unique values in self.
-            ``rows`` -- row labels, as (``str``) or list of strings, used to
+            ``columns`` -- a single column label, (``str``), in table, used to
+                create new columns, based on its unique values.
+            ``rows`` -- row labels, as (``str``) or array of strings, used to
                 create new rows based on it's unique values.
-            ``values`` -- column label in self for use in aggregation.
+            ``values`` -- column label in table for use in aggregation.
+                Default None.
             ``collect`` -- aggregation function, used to group ``values``
-                over row-column combinations.
+                over row-column combinations. Default None.
             ``zero`` -- zero value for non-existent row-column combinations.
 
+        Raises:
+            TypeError -- if ``collect`` is passed in and ``values`` is not,
+                vice versa.
+
         Returns:
             New pivot table, with row-column combinations, as specified, with
             aggregated ``values`` by ``collect`` across the intersection of
-            ``columns`` and ``rows``. Simple counts provided if values/collect
-            is None, as default.
+            ``columns`` and ``rows``. Simple counts provided if values and
+            collect are None, as default.
 
         >>> titanic = Table().with_columns('age', make_array(21, 44, 56, 89, 95
         ...    , 40, 80, 45), 'survival', make_array(0,0,0,1, 1, 1, 0, 1),
@@ -1056,6 +1061,14 @@ def pivot(self, columns, rows, values=None, collect=None, zero=None):
         0          | M      | 2    | 0
         1          | F      | 0    | 2
         1          | M      | 1    | 1
+        >>> titanic.pivot('survival', 'gender', values = 'age')
+        Traceback (most recent call last):
+           ...
+        TypeError: values requires collect to be specified
+        >>> titanic.pivot('survival', 'gender', collect = np.mean)
+        Traceback (most recent call last):
+           ...
+        TypeError: collect requires values to be specified
         """
         if collect is not None and values is None:
             raise TypeError('collect requires values to be specified')
@@ -1247,20 +1260,25 @@ def sample(self, k=None, with_replacement=True, weights=None):
         original table.
 
         Args:
-            ``k`` -- specifies the number of rows (``int``)  to be sampled from
-                self. Default is k is equal to number of rows in self.
+            ``k`` -- specifies the number of rows (``int``) to be sampled from
+               the table. Default is k equal to number of rows in the table.
+
+            ``with_replacement`` -- (``bool``) By default True; Samples ``k``
+                rows with replacement from table, else samples ``k`` rows
+                without replacement.
 
-            ``with_replacement`` -- (``boolean``), if true samples ``k`` rows
-                with replacement from self, else samples ``k`` rows without
-                replacement.
+            ``weights`` -- Array specifying probability the ith row of the
+                table is sampled. Defaults to None, which samples each row
+                with equal probability. ``weights`` must be a valid probability
+                distribution -- i.e. an array the length of the number of rows,
+                summing to 1.
 
-            ``weights``: Array specifying valid probability distribution.
-                Rows in self are sampled according the the
-                probability distribution given by ``weights``. Default is
-                uniform distribution on [1, ... , n], n = number of rows.
+        Raises:
+            ValueError -- if ``weights`` is not length equal to number of rows
+                in the table; or, if ``weights`` does not sum to 1.
 
         Returns:
-            A new instance of ``Table`` with k rows resampled.
+            A new instance of ``Table`` with ``k`` rows resampled.
 
         >>> jobs = Table().with_columns(
         ...     'job',  make_array('a', 'b', 'c', 'd'),