From e67ec80a7822f60aa7223662cf4e56dd3d0e3d6d Mon Sep 17 00:00:00 2001
From: Maxwell Weinstein <maxwelljweinstein@berkeley.edu>
Date: Sat, 15 Oct 2016 22:41:00 -0700
Subject: [PATCH 1/7] replaced all instances of list usage in docs with
 make_array

---
 datascience/tables.py | 161 ++++++++++++++++++++----------------------
 1 file changed, 78 insertions(+), 83 deletions(-)

diff --git a/datascience/tables.py b/datascience/tables.py
index efdad6321..f5c0df0d8 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -31,7 +31,7 @@ class Table(collections.abc.MutableMapping):
     def __init__(self, labels=None, _deprecated=None, *, formatter=_formats.default_formatter):
         """Create an empty table with column labels.
 
-        >>> tiles = Table(['letter', 'count', 'points'])
+        >>> tiles = Table(make_array('letter', 'count', 'points'))
         >>> tiles
         letter | count | points
 
@@ -324,10 +324,10 @@ def apply(self, fn, column_label=None):
             A numpy array consisting of results of applying ``fn`` to elements
             specified by ``column_label`` in each row.
 
-        >>> t = Table().with_columns([
-        ...     'letter', ['a', 'b', 'c', 'z'],
-        ...     'count',  [9, 3, 3, 1],
-        ...     'points', [1, 2, 2, 10]])
+        >>> t = Table().with_columns(
+        ...     'letter', make_array('a', 'b', 'c', 'z'),
+        ...     'count',  make_array(9, 3, 3, 1),
+        ...     'points', make_array(1, 2, 2, 10))
         >>> t
         letter | count | points
         a      | 9     | 1
@@ -403,8 +403,8 @@ def append_column(self, label, values):
         """Appends a column to the table or replaces a column.
 
         ``__setitem__`` is aliased to this method:
-            ``table.append_column('new_col', [1, 2, 3])`` is equivalent to
-            ``table['new_col'] = [1, 2, 3]``.
+            ``table.append_column('new_col', make_array(1, 2, 3))`` is equivalent to
+            ``table['new_col'] = make_array(1, 2, 3)``.
 
         Args:
             ``label`` (str): The label of the new column.
@@ -424,17 +424,17 @@ def append_column(self, label, values):
                 - ``values`` is a list/array and does not have the same length
                   as the number of rows in the table.
 
-        >>> table = Table().with_columns([
-        ...     'letter', ['a', 'b', 'c', 'z'],
-        ...     'count',  [9, 3, 3, 1],
-        ...     'points', [1, 2, 2, 10]])
+        >>> table = Table().with_columns(
+        ...     'letter', make_array('a', 'b', 'c', 'z'),
+        ...     'count',  make_array(9, 3, 3, 1),
+        ...     'points', make_array(1, 2, 2, 10))
         >>> table
         letter | count | points
         a      | 9     | 1
         b      | 3     | 2
         c      | 3     | 2
         z      | 1     | 10
-        >>> table.append_column('new_col1', [10, 20, 30, 40])
+        >>> table.append_column('new_col1', make_array(10, 20, 30, 40))
         >>> table
         letter | count | points | new_col1
         a      | 9     | 1      | 10
@@ -448,7 +448,7 @@ def append_column(self, label, values):
         b      | 3     | 2      | 20       | hello
         c      | 3     | 2      | 30       | hello
         z      | 1     | 10     | 40       | hello
-        >>> table.append_column(123, [1, 2, 3, 4])
+        >>> table.append_column(123, make_array(1, 2, 3, 4))
         Traceback (most recent call last):
             ...
         ValueError: The column label must be a string, but a int was given
@@ -495,30 +495,23 @@ def relabel(self, column_label, new_label):
         Returns:
             Original table with modified labels
 
-        >>> table = Table().with_columns([
+        >>> table = Table().with_columns(
         ...     'points', (1, 2, 3),
-        ...     'id',     (12345, 123, 5123)])
+        ...     'id',     (12345, 123, 5123))
         >>> table.relabel('id', 'yolo')
         points | yolo
         1      | 12345
         2      | 123
         3      | 5123
-        >>> table.relabel(['points', 'yolo'], ['red', 'blue'])
+        >>> table.relabel(make_array('points', 'yolo'), make_array('red', 'blue'))
         red  | blue
         1    | 12345
         2    | 123
         3    | 5123
-        >>> table.relabel(['red', 'green', 'blue'],
-        ...               ['cyan', 'magenta', 'yellow', 'key'])
+        >>> table.relabel(make_array('red', 'green', 'blue'), make_array('cyan', 'magenta', 'yellow', 'key'))
         Traceback (most recent call last):
             ...
-        ValueError: Invalid arguments. column_label and new_label must be of
-        equal length.
-        >>> table.relabel(['red', 'blue'], ['blue', 'red'])
-        blue | red
-        1    | 12345
-        2    | 123
-        3    | 5123
+        ValueError: Invalid arguments. column_label and new_label must be of equal length.
         """
         if isinstance(column_label, numbers.Integral):
             column_label = self._as_label(column_label)
@@ -635,10 +628,10 @@ def drop(self, *column_label_or_labels):
         Returns:
             An instance of ``Table`` with given columns removed.
 
-        >>> t = Table().with_columns([
-        ...     'burgers',  ['cheeseburger', 'hamburger', 'veggie burger'],
-        ...     'prices',   [6, 5, 5],
-        ...     'calories', [743, 651, 582]])
+        >>> t = Table().with_columns(
+        ...     'burgers',  make_array('cheeseburger', 'hamburger', 'veggie burger'),
+        ...     'prices',   make_array(6, 5, 5),
+        ...     'calories', make_array(743, 651, 582))
         >>> t
         burgers       | prices | calories
         cheeseburger  | 6      | 743
@@ -792,11 +785,11 @@ def sort(self, column_or_label, descending=False, distinct=False):
             An instance of ``Table`` containing rows sorted based on the values
             in ``column_or_label``.
 
-        >>> marbles = Table().with_columns([
-        ...    "Color", ["Red", "Green", "Blue", "Red", "Green", "Green"],
-        ...    "Shape", ["Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"],
-        ...    "Amount", [4, 6, 12, 7, 9, 2],
-        ...    "Price", [1.30, 1.30, 2.00, 1.75, 1.40, 1.00]])
+        >>> marbles = Table().with_columns(
+        ...    "Color", make_array("Red", "Green", "Blue", "Red", "Green", "Green"),
+        ...    "Shape", make_array("Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"),
+        ...    "Amount", make_array(4, 6, 12, 7, 9, 2),
+        ...    "Price", make_array(1.30, 1.30, 2.00, 1.75, 1.40, 1.00))
         >>> marbles
         Color | Shape       | Amount | Price
         Red   | Round       | 4      | 1.3
@@ -868,11 +861,11 @@ def group(self, column_or_label, collect=None):
             accept arguments with one of the column types, that column will be empty in the resulting
             table.
 
-        >>> marbles = Table().with_columns([
-        ...    "Color", ["Red", "Green", "Blue", "Red", "Green", "Green"],
-        ...    "Shape", ["Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"],
-        ...    "Amount", [4, 6, 12, 7, 9, 2],
-        ...    "Price", [1.30, 1.30, 2.00, 1.75, 1.40, 1.00]])
+        >>> marbles = Table().with_columns(
+        ...    "Color", make_array("Red", "Green", "Blue", "Red", "Green", "Green"),
+        ...    "Shape", make_array("Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"),
+        ...    "Amount", make_array(4, 6, 12, 7, 9, 2),
+        ...    "Price", make_array(1.30, 1.30, 2.00, 1.75, 1.40, 1.00))
         >>> marbles
         Color | Shape       | Amount | Price
         Red   | Round       | 4      | 1.3
@@ -949,11 +942,11 @@ def groups(self, labels, collect=None):
             accept arguments with one of the column types, that column will be empty in the resulting
             table.
 
-        >>> marbles = Table().with_columns([
-        ...    "Color", ["Red", "Green", "Blue", "Red", "Green", "Green"],
-        ...    "Shape", ["Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"],
-        ...    "Amount", [4, 6, 12, 7, 9, 2],
-        ...    "Price", [1.30, 1.30, 2.00, 1.75, 1.40, 1.00]])
+        >>> marbles = Table().with_columns(
+        ...    "Color", make_array("Red", "Green", "Blue", "Red", "Green", "Green"),
+        ...    "Shape", make_array("Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"),
+        ...    "Amount", make_array(4, 6, 12, 7, 9, 2),
+        ...    "Price", make_array(1.30, 1.30, 2.00, 1.75, 1.40, 1.00))
         >>> marbles
         Color | Shape       | Amount | Price
         Red   | Round       | 4      | 1.3
@@ -1184,9 +1177,9 @@ def percentile(self, p):
         pth percentile of a column is the smallest value that at at least as
         large as the p% of numbers in the column.
 
-        >>> table = Table().with_columns([
-        ...     'count',  [9, 3, 3, 1],
-        ...     'points', [1, 2, 2, 10]])
+        >>> table = Table().with_columns(
+        ...     'count',  make_array(9, 3, 3, 1),
+        ...     'points', make_array(1, 2, 2, 10))
         >>> table
         count | points
         9     | 1
@@ -1222,9 +1215,9 @@ def sample(self, k=None, with_replacement=True, weights=None):
         Returns:
             A new instance of ``Table``.
 
-        >>> jobs = Table().with_columns([
-        ...     'job',  ['a', 'b', 'c', 'd'],
-        ...     'wage', [10, 20, 15, 8]])
+        >>> jobs = Table().with_columns(
+        ...     'job',  make_array('a', 'b', 'c', 'd'),
+        ...     'wage', make_array(10, 20, 15, 8))
         >>> jobs
         job  | wage
         a    | 10
@@ -1248,7 +1241,7 @@ def sample(self, k=None, with_replacement=True, weights=None):
         b    | 20
         c    | 15
         >>> jobs.sample(k = 2, with_replacement = True,
-        ...     weights = [0.5, 0.5, 0, 0]) # doctest: +SKIP
+        ...     weights = make_array(0.5, 0.5, 0, 0)) # doctest: +SKIP
         job  | wage
         a    | 10
         a    | 10
@@ -1306,9 +1299,9 @@ def split(self, k):
         Returns:
             A tuple containing two instances of ``Table``.
 
-        >>> jobs = Table().with_columns([
-        ...     'job',  ['a', 'b', 'c', 'd'],
-        ...     'wage', [10, 20, 15, 8]])
+        >>> jobs = Table().with_columns(
+        ...     'job',  make_array('a', 'b', 'c', 'd'),
+        ...     'wage', make_array(10, 20, 15, 8))
         >>> jobs
         job  | wage
         a    | 10
@@ -1348,7 +1341,7 @@ def with_row(self, row):
         Raises:
             ``ValueError``: If the row length differs from the column count.
 
-        >>> tiles = Table(['letter', 'count', 'points'])
+        >>> tiles = Table(make_array('letter', 'count', 'points'))
         >>> tiles.with_row(['c', 2, 3]).with_row(['d', 4, 2])
         letter | count | points
         c      | 2     | 3
@@ -1369,8 +1362,9 @@ def with_rows(self, rows):
         Raises:
             ``ValueError``: If a row length differs from the column count.
 
-        >>> tiles = Table(['letter', 'count', 'points'])
-        >>> tiles.with_rows([['c', 2, 3], ['d', 4, 2]])
+        >>> tiles = Table(make_array('letter', 'count', 'points'))
+        >>> tiles.with_rows(make_array(make_array('c', 2, 3),
+        ...     make_array('d', 4, 2)))
         letter | count | points
         c      | 2     | 3
         d      | 4     | 2
@@ -1523,7 +1517,8 @@ def relabeled(self, label, new_label):
                 columns to be changed. Same number of elements as label.
 
         >>> tiles = Table(['letter', 'count'])
-        >>> tiles = tiles.with_rows([['c', 2], ['d', 4]])
+        >>> tiles = tiles.with_rows(
+        ...    make_array(make_array('c', 2), make_array('d', 4)))
         >>> tiles.relabeled('count', 'number')
         letter | number
         c      | 2
@@ -1677,9 +1672,9 @@ def to_csv(self, filename):
         Returns:
             None, outputs a file with name ``filename``.
 
-        >>> jobs = Table().with_columns([
-        ...     'job',  ['a', 'b', 'c', 'd'],
-        ...     'wage', [10, 20, 15, 8]])
+        >>> jobs = Table().with_columns(
+        ...     'job',  make_array('a', 'b', 'c', 'd'),
+        ...     'wage', make_array(10, 20, 15, 8))
         >>> jobs
         job  | wage
         a    | 10
@@ -1826,11 +1821,11 @@ def barh(self, column_for_categories=None, select=None, overlay=True, **vargs):
                 See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.barh
                 for additional arguments that can be passed into vargs.
 
-        >>> t = Table().with_columns([
-        ...     'Furniture', ['chairs', 'tables', 'desks'],
-        ...     'Count', [6, 1, 2],
-        ...     'Price', [10, 20, 30]
-        ...     ])
+        >>> t = Table().with_columns(
+        ...     'Furniture', make_array('chairs', 'tables', 'desks'),
+        ...     'Count', make_array(6, 1, 2),
+        ...     'Price', make_array(10, 20, 30)
+        ...     )
         >>> t
         Furniture | Count | Price
         chairs    | 6     | 10
@@ -1908,10 +1903,10 @@ def scatter(self, column_for_x, select=None, overlay=True, fit_line=False,
 
             ``labels``: A column of text labels to annotate dots
 
-        >>> table = Table().with_columns([
-        ...     'x', [9, 3, 3, 1],
-        ...     'y', [1, 2, 2, 10],
-        ...     'z', [3, 4, 5, 6]])
+        >>> table = Table().with_columns(
+        ...     'x', make_array(9, 3, 3, 1),
+        ...     'y', make_array(1, 2, 2, 10),
+        ...     'z', make_array(3, 4, 5, 6))
         >>> table
         x    | y    | z
         9    | 1    | 3
@@ -2056,9 +2051,9 @@ def hist(self, select=None, overlay=True, bins=None, counts=None, unit=None, **v
                 include: `range`, `normed`, `cumulative`, and `orientation`,
                 to name a few.
 
-        >>> t = Table().with_columns([
-        ...     'count',  [9, 3, 3, 1],
-        ...     'points', [1, 2, 2, 10]])
+        >>> t = Table().with_columns(
+        ...     'count',  make_array(9, 3, 3, 1),
+        ...     'points', make_array(1, 2, 2, 10))
         >>> t
         count | points
         9     | 1
@@ -2069,9 +2064,9 @@ def hist(self, select=None, overlay=True, bins=None, counts=None, unit=None, **v
         <histogram of values in count>
         <histogram of values in points>
 
-        >>> t = Table().with_columns([
-        ...     'value',      [101, 102, 103],
-        ...     'proportion', [0.25, 0.5, 0.25]])
+        >>> t = Table().with_columns(
+        ...     'value',      make_array(101, 102, 103),
+        ...     'proportion', make_array(0.25, 0.5, 0.25))
         >>> t.hist(counts='value') # doctest: +SKIP
         <histogram of values in prop weighted by corresponding values in value>
         """
@@ -2171,9 +2166,9 @@ def boxplot(self, **vargs):
         Raises:
             ValueError: The Table contains columns with non-numerical values.
 
-        >>> table = Table().with_columns([
-        ...     'test1', [92.5, 88, 72, 71, 99, 100, 95, 83, 94, 93],
-        ...     'test2', [89, 84, 74, 66, 92, 99, 88, 81, 95, 94]])
+        >>> table = Table().with_columns(
+        ...     'test1', make_array(92.5, 88, 72, 71, 99, 100, 95, 83, 94, 93),
+        ...     'test2', make_array(89, 84, 74, 66, 92, 99, 88, 81, 95, 94))
         >>> table
         test1 | test2
         92.5  | 89
@@ -2446,9 +2441,9 @@ def __getitem__(self, row_indices_or_slice):
         Returns:
             A new instance of ``Table``.
 
-        >>> t = Table().with_columns([
-        ...     'letter grade', ['A+', 'A', 'A-', 'B+', 'B', 'B-'],
-        ...     'gpa', [4, 4, 3.7, 3.3, 3, 2.7]])
+        >>> t = Table().with_columns(
+        ...     'letter grade', make_array('A+', 'A', 'A-', 'B+', 'B', 'B-'),
+        ...     'gpa', make_array(4, 4, 3.7, 3.3, 3, 2.7))
         >>> t
         letter grade | gpa
         A+           | 4
@@ -2471,7 +2466,7 @@ def __getitem__(self, row_indices_or_slice):
         A-           | 3.7
         B+           | 3.3
         B            | 3
-        >>> t.exclude([1, 3, 4])
+        >>> t.exclude(make_array(1, 3, 4))
         letter grade | gpa
         A+           | 4
         A-           | 3.7

From 5feba1f1e842ea9aa6dcc7396f81622516c91323 Mon Sep 17 00:00:00 2001
From: Maxwell Weinstein <maxwelljweinstein@berkeley.edu>
Date: Sat, 15 Oct 2016 23:13:44 -0700
Subject: [PATCH 2/7] resolved merge conflicts

---
 datascience/tables.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/datascience/tables.py b/datascience/tables.py
index f5c0df0d8..77c679ba3 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -251,8 +251,11 @@ def column(self, index_or_label):
 
         >>> tiles = Table().with_columns(
         ...     'letter', make_array('c', 'd'),
+<<<<<<< e67ec80a7822f60aa7223662cf4e56dd3d0e3d6d
         ...     'count',  make_array(2, 4),
         ... )
+=======
+>>>>>>> resolved merge conflicts
         >>> tiles.column('letter')
         array(['c', 'd'],
               dtype='<U1')

From 4b078921e5ed9acd7115dd3f5a7483a1d59b5ab9 Mon Sep 17 00:00:00 2001
From: Maxwell Weinstein <maxwelljweinstein@berkeley.edu>
Date: Sat, 15 Oct 2016 23:21:28 -0700
Subject: [PATCH 3/7] fixed last failing test

---
 datascience/tables.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/datascience/tables.py b/datascience/tables.py
index 77c679ba3..a177c521d 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -251,11 +251,9 @@ def column(self, index_or_label):
 
         >>> tiles = Table().with_columns(
         ...     'letter', make_array('c', 'd'),
-<<<<<<< e67ec80a7822f60aa7223662cf4e56dd3d0e3d6d
         ...     'count',  make_array(2, 4),
         ... )
-=======
->>>>>>> resolved merge conflicts
+
         >>> tiles.column('letter')
         array(['c', 'd'],
               dtype='<U1')

From a244467c9528a5d136ad242d968d20cc6533765e Mon Sep 17 00:00:00 2001
From: Maxwell Weinstein <maxwelljweinstein@berkeley.edu>
Date: Sat, 22 Oct 2016 13:30:50 -0700
Subject: [PATCH 4/7] sample and pivot docs updated

---
 datascience/tables.py | 95 +++++++++++++++++++++++++++++++++----------
 1 file changed, 73 insertions(+), 22 deletions(-)

diff --git a/datascience/tables.py b/datascience/tables.py
index a177c521d..3d8b237a8 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -999,15 +999,58 @@ def groups(self, labels, collect=None):
             return grouped
 
     def pivot(self, columns, rows, values=None, collect=None, zero=None):
-        """Generate a table with a column for rows (or a column for each row
-        in rows list) and a column for each unique value in columns. Each row
-        counts/aggregates the values that match both row and column.
-
-        columns -- column label in self
-        rows -- column label or a list of column labels
-        values -- column label in self (or None to produce counts)
-        collect -- aggregation function over values
-        zero -- zero value for non-existent row-column combinations
+        """Generate a table with a column for each unique value in ``columns``,
+        with rows for each unique value in ``rows``. Each row counts/aggregates
+        the values that match both row and column based on ``collect``.
+
+        Args:
+            ``columns`` -- a single column label, (``str``), in self, used to
+                create new columns, based on its unique values in self.
+            ``rows`` -- row labels, as (``str``) or list of strings, used to
+                create new rows based on it's unique values.
+            ``values`` -- column label in self for use in aggregation.
+            ``collect`` -- aggregation function, used to group ``values``
+                over row-column combinations.
+            ``zero`` -- zero value for non-existent row-column combinations.
+
+        Returns:
+            New pivot table, with row-column combinations, as specified, with
+            aggregated ``values`` by ``collect`` across the intersection of
+            ``columns`` and ``rows``. Simple counts provided if values/collect
+            is None, as default.
+
+        >>> titanic = Table().with_columns('age', make_array(21, 44, 56, 89, 95
+        ...    , 40, 80, 45), 'survival', make_array(0,0,0,1, 1, 1, 0, 1),
+        ...    'gender',  make_array('M', 'M', 'M', 'M', 'F', 'F', 'F', 'F'),
+        ...    'prediction', make_array(0, 0, 1, 1, 0, 1, 0, 1))
+        >>> titanic
+        age  | survival | gender | prediction
+        21   | 0        | M      | 0
+        44   | 0        | M      | 0
+        56   | 0        | M      | 1
+        89   | 1        | M      | 1
+        95   | 1        | F      | 0
+        40   | 1        | F      | 1
+        80   | 0        | F      | 0
+        45   | 1        | F      | 1
+        >>> titanic.pivot('survival', 'gender')
+        gender | 0    | 1
+        F      | 1    | 3
+        M      | 3    | 1
+        >>> titanic.pivot('prediction', 'gender')
+        gender | 0    | 1
+        F      | 2    | 2
+        M      | 2    | 2
+        >>> titanic.pivot('survival', 'gender', values='age', collect = np.mean)
+        gender | 0       | 1
+        F      | 80      | 60
+        M      | 40.3333 | 89
+        >>> titanic.pivot('survival', make_array('prediction', 'gender'))
+        prediction | gender | 0    | 1
+        0          | F      | 1    | 1
+        0          | M      | 2    | 0
+        1          | F      | 0    | 2
+        1          | M      | 1    | 1
         """
         if collect is not None and values is None:
             raise TypeError('collect requires values to be specified')
@@ -1198,23 +1241,21 @@ def sample(self, k=None, with_replacement=True, weights=None):
         """Returns a new table where k rows are randomly sampled from the
         original table.
 
-        Kwargs:
-            k (int or None): If None (default), all the rows in the table are
-                sampled. If an integer, k rows from the original table are
-                sampled.
+        Args:
+            ``k`` -- specifies the number of rows (``int``)  to be sampled from
+                self. Default is k is equal to number of rows in self.
 
-            with_replacement (bool): If True (default), samples the rows with
-                replacement. If False, samples the rows without replacement.
+            ``with_replacement`` -- (``boolean``), if true samples ``k`` rows
+                with replacement from self, else samples ``k`` rows without
+                replacement.
 
-            weights (list/array or None): If None (default), samples the rows
-                using a uniform random distribution. If a list/array is passed
-                in, it must be the same length as the number of rows in the
-                table and the values must sum to 1. The rows will then be
-                sampled according the the probability distribution in
-                ``weights``.
+            ``weights``: Array specifying valid probability distribution.
+                Rows in self are sampled according the the
+                probability distribution given by ``weights``. Default is
+                uniform distribution on [1, ... , n], n = number of rows.
 
         Returns:
-            A new instance of ``Table``.
+            A new instance of ``Table`` with k rows resampled.
 
         >>> jobs = Table().with_columns(
         ...     'job',  make_array('a', 'b', 'c', 'd'),
@@ -1246,6 +1287,16 @@ def sample(self, k=None, with_replacement=True, weights=None):
         job  | wage
         a    | 10
         a    | 10
+        >>> jobs.sample(k = 2, weights = make_array(1, 0, 1, 0))
+        Traceback (most recent call last):
+            ...
+        ValueError: probabilities do not sum to 1
+
+        # Weights must be length of table.
+        >>> jobs.sample(k = 2, weights = make_array(1, 0, 0))
+        Traceback (most recent call last):
+            ...
+        ValueError: a and p must have same size
         """
         n = self.num_rows
         if k is None:

From 580e11016687088e1d61d3449034c787cc3b80e3 Mon Sep 17 00:00:00 2001
From: Maxwell Weinstein <maxwelljweinstein@berkeley.edu>
Date: Sat, 22 Oct 2016 15:04:29 -0700
Subject: [PATCH 5/7] fixed re:  comments

---
 datascience/tables.py | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/datascience/tables.py b/datascience/tables.py
index 8b309f179..3c7c39e37 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -1009,15 +1009,19 @@ def pivot(self, columns, rows, values=None, collect=None, zero=None):
         the values that match both row and column based on ``collect``.
 
         Args:
-            ``columns`` -- a single column label, (``str``), in self, used to
-                create new columns, based on its unique values in self.
-            ``rows`` -- row labels, as (``str``) or list of strings, used to
+            ``columns`` -- a single column label, (``str``), in table, used to
+                create new columns, based on its unique values.
+            ``rows`` -- row labels, as (``str``) or array of strings, used to
                 create new rows based on it's unique values.
-            ``values`` -- column label in self for use in aggregation.
+            ``values`` -- column label in table for use in aggregation.
             ``collect`` -- aggregation function, used to group ``values``
                 over row-column combinations.
             ``zero`` -- zero value for non-existent row-column combinations.
 
+        Raises:
+            TypeError -- if collect is passed in and values is not,
+                and vice versa.
+
         Returns:
             New pivot table, with row-column combinations, as specified, with
             aggregated ``values`` by ``collect`` across the intersection of
@@ -1056,6 +1060,14 @@ def pivot(self, columns, rows, values=None, collect=None, zero=None):
         0          | M      | 2    | 0
         1          | F      | 0    | 2
         1          | M      | 1    | 1
+        >>> titanic.pivot('survival', 'gender', values = 'age')
+        Traceback (most recent call last):
+           ...
+        TypeError: values requires collect to be specified
+        >>> titanic.pivot('survival', 'gender', collect = np.mean)
+        Traceback (most recent call last):
+           ...
+        TypeError: collect requires values to be specified
         """
         if collect is not None and values is None:
             raise TypeError('collect requires values to be specified')
@@ -1248,19 +1260,24 @@ def sample(self, k=None, with_replacement=True, weights=None):
 
         Args:
             ``k`` -- specifies the number of rows (``int``)  to be sampled from
-                self. Default is k is equal to number of rows in self.
+               the table. Default is k is equal to number of rows in the table.
 
-            ``with_replacement`` -- (``boolean``), if true samples ``k`` rows
-                with replacement from self, else samples ``k`` rows without
-                replacement.
+            ``with_replacement`` -- (``bool``) By default, TRUE, Samples ``k``
+                rows with replacement from table, else samples ``k`` rows
+                without replacement.
 
-            ``weights``: Array specifying valid probability distribution.
-                Rows in self are sampled according the the
-                probability distribution given by ``weights``. Default is
+            ``weights`` -- Array specifying probability the ith row of the
+                table is sampled. If None, by default, ``weights`` is the
                 uniform distribution on [1, ... , n], n = number of rows.
+                ``weights`` must be a valid probability distribution -- i.e.
+                an array the length of the number of rows, summing to 1.
+
+        Raises:
+            ValueError -- if ``weights`` is not length equal to number of rows
+                in the table; or, if ``weights`` does not sum to 1.
 
         Returns:
-            A new instance of ``Table`` with k rows resampled.
+            A new instance of ``Table`` with ``k`` rows resampled.
 
         >>> jobs = Table().with_columns(
         ...     'job',  make_array('a', 'b', 'c', 'd'),

From 08bc2ee659a9bcee002ee7f3c17204c079848d97 Mon Sep 17 00:00:00 2001
From: Maxwell Weinstein <maxwelljweinstein@berkeley.edu>
Date: Sat, 22 Oct 2016 15:09:36 -0700
Subject: [PATCH 6/7] fixed re: errors

---
 datascience/tables.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/datascience/tables.py b/datascience/tables.py
index 3c7c39e37..fac2fd6c5 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -1014,19 +1014,20 @@ def pivot(self, columns, rows, values=None, collect=None, zero=None):
             ``rows`` -- row labels, as (``str``) or array of strings, used to
                 create new rows based on it's unique values.
             ``values`` -- column label in table for use in aggregation.
+                Default None.
             ``collect`` -- aggregation function, used to group ``values``
-                over row-column combinations.
+                over row-column combinations. Default None.
             ``zero`` -- zero value for non-existent row-column combinations.
 
         Raises:
-            TypeError -- if collect is passed in and values is not,
-                and vice versa.
+            TypeError -- if ``collect`` is passed in and ``values`` is not,
+                vice versa.
 
         Returns:
             New pivot table, with row-column combinations, as specified, with
             aggregated ``values`` by ``collect`` across the intersection of
-            ``columns`` and ``rows``. Simple counts provided if values/collect
-            is None, as default.
+            ``columns`` and ``rows``. Simple counts provided if values and
+            collect are None, as default.
 
         >>> titanic = Table().with_columns('age', make_array(21, 44, 56, 89, 95
         ...    , 40, 80, 45), 'survival', make_array(0,0,0,1, 1, 1, 0, 1),
@@ -1259,10 +1260,10 @@ def sample(self, k=None, with_replacement=True, weights=None):
         original table.
 
         Args:
-            ``k`` -- specifies the number of rows (``int``)  to be sampled from
-               the table. Default is k is equal to number of rows in the table.
+            ``k`` -- specifies the number of rows (``int``) to be sampled from
+               the table. Default is k equal to number of rows in the table.
 
-            ``with_replacement`` -- (``bool``) By default, TRUE, Samples ``k``
+            ``with_replacement`` -- (``bool``) By default TRUE; Samples ``k``
                 rows with replacement from table, else samples ``k`` rows
                 without replacement.
 

From 6bd1776961bd6059c8c8d2a52ab2fe5be63600ad Mon Sep 17 00:00:00 2001
From: Maxwell Weinstein <maxwelljweinstein@berkeley.edu>
Date: Sun, 23 Oct 2016 09:01:21 -0700
Subject: [PATCH 7/7] fixed weights

---
 datascience/tables.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/datascience/tables.py b/datascience/tables.py
index fac2fd6c5..92214422e 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -1263,15 +1263,15 @@ def sample(self, k=None, with_replacement=True, weights=None):
             ``k`` -- specifies the number of rows (``int``) to be sampled from
                the table. Default is k equal to number of rows in the table.
 
-            ``with_replacement`` -- (``bool``) By default TRUE; Samples ``k``
+            ``with_replacement`` -- (``bool``) By default True; Samples ``k``
                 rows with replacement from table, else samples ``k`` rows
                 without replacement.
 
             ``weights`` -- Array specifying probability the ith row of the
-                table is sampled. If None, by default, ``weights`` is the
-                uniform distribution on [1, ... , n], n = number of rows.
-                ``weights`` must be a valid probability distribution -- i.e.
-                an array the length of the number of rows, summing to 1.
+                table is sampled. Defaults to None, which samples each row
+                with equal probability. ``weights`` must be a valid probability
+                distribution -- i.e. an array the length of the number of rows,
+                summing to 1.
 
         Raises:
             ValueError -- if ``weights`` is not length equal to number of rows