Skip to content

Commit

Permalink
Merge aa1c8a2 into 4762baf
Browse files Browse the repository at this point in the history
  • Loading branch information
papajohn committed Feb 7, 2016
2 parents 4762baf + aa1c8a2 commit ffce859
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 18 deletions.
22 changes: 17 additions & 5 deletions datascience/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,19 +651,29 @@ def groups(self, labels, collect=None):
grouped[_collected_label(collect, label)] = column
return grouped

def pivot(self, columns, rows, values, collect=len, zero=None):
def pivot(self, columns, rows, values=None, collect=None, zero=None):
"""Generate a table with a column for rows (or a column for each row
in rows list) and a column for each unique value in columns. Each row
aggregates over the values that match both row and column.
counts/aggregates the values that match both row and column.
columns, values -- column labels in self
columns -- column label in self
rows -- column label or a list of column labels
values -- column label in self (or None to produce counts)
collect -- aggregation function over values
zero -- zero value for non-existent row-column combinations
"""
if collect is not None and values is None:
raise TypeError('collect requires values to be specified')
if values is not None and collect is None:
raise TypeError('values requires collect to be specified')
rows = self._as_labels(rows)
selected = self.select([columns, values] + rows)
if values is None:
selected = self.select([columns] + rows)
else:
selected = self.select([columns, values] + rows)
grouped = selected.groups([columns] + rows, collect)
if values is None:
values = grouped.labels[-1]

# Generate existing combinations of values from columns in rows
rows_values = sorted(list(set(self.select(rows).rows)))
Expand All @@ -674,7 +684,7 @@ def pivot(self, columns, rows, values, collect=len, zero=None):
for label in sorted(by_columns):
tuples = [t[1:] for t in by_columns[label]] # Discard column value
column = _fill_with_zeros(rows_values, tuples, zero)
pivot = self._unused_label(str(label) + ' ' + values)
pivot = self._unused_label(str(label))
pivoted[pivot] = column
return pivoted

Expand Down Expand Up @@ -1810,6 +1820,8 @@ def _zero_on_type_error(column_fn):
"""Wrap a function on an np.ndarray to return 0 on a type error."""
if not column_fn:
return column_fn
if not callable(column_fn):
raise TypeError('column functions must be callable')
@functools.wraps(column_fn)
def wrapped(column):
try:
Expand Down
45 changes: 32 additions & 13 deletions tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,16 +371,34 @@ def test_join(t, u):
""")


def test_pivot(t):
def test_pivot_counts(t):
t = t.copy()
t.append(('e', 12, 1, 12))
t['early'] = t['letter'] < 'd'
t['exists'] = 1
test = t.pivot('points', 'early', 'exists')
test = t.pivot('points', 'early')
assert_equal(test, """
early | 1 exists | 2 exists | 10 exists
False | 1 | 0 | 1
True | 1 | 2 | 0
early | 1 | 2 | 10
False | 1 | 0 | 1
True | 1 | 2 | 0
""")


def test_pivot_values(t):
t = t.copy()
t.append(('e', 12, 1, 12))
t['early'] = t['letter'] < 'd'
t['exists'] = 2
summed = t.pivot('points', 'early', 'exists', sum)
assert_equal(summed, """
early | 1 | 2 | 10
False | 2 | 0 | 2
True | 2 | 4 | 0
""")
maxed = t.pivot('points', 'early', 'exists', max, -1)
assert_equal(maxed, """
early | 1 | 2 | 10
False | 2 | -1 | 2
True | 2 | 2 | -1
""")


Expand All @@ -390,11 +408,11 @@ def test_pivot_multiple_rows(t):
t['early'] = t['letter'] < 'd'
t['late'] = t['letter'] > 'c'
t['exists'] = 1
test = t.pivot('points', ['early', 'late'], 'exists')
test = t.pivot('points', ['early', 'late'], 'exists', sum)
assert_equal(test, """
early | late | 1 exists | 2 exists | 10 exists
False | True | 1 | 0 | 1
True | False | 1 | 2 | 0
early | late | 1 | 2 | 10
False | True | 1 | 0 | 1
True | False | 1 | 2 | 0
""")


Expand All @@ -405,11 +423,12 @@ def test_pivot_sum(t):
t['exists'] = 1
test = t.pivot('points', 'early', 'exists', sum)
assert_equal(test, """
early | 1 exists | 2 exists | 10 exists
False | 1 | 0 | 1
True | 1 | 2 | 0
early | 1 | 2 | 10
False | 1 | 0 | 1
True | 1 | 2 | 0
""")


def test_apply(t):
t = t.copy()
assert_array_equal(t.apply(lambda x, y: x * y, ['count', 'points']), np.array([9, 6, 6, 10]))
Expand Down

0 comments on commit ffce859

Please sign in to comment.