Skip to content

Commit

Permalink
added optional formatters for with_column, with_columns, append_column
Browse files Browse the repository at this point in the history
  • Loading branch information
adnanhemani committed Jan 17, 2017
1 parent 7c6a05c commit ccb5f61
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 24 deletions.
60 changes: 36 additions & 24 deletions datascience/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def append(self, row_or_table):
self._num_rows += n
return self

def append_column(self, label, values):
def append_column(self, label, values, formatter=None):
"""Appends a column to the table or replaces a column.
``__setitem__`` is aliased to this method:
Expand All @@ -424,6 +424,7 @@ def append_column(self, label, values):
If a list or array, the new column contains the values in
``values``, which must be the same length as the table.
``formatter`` (single formatter): Adds a formatter to the column being appended. No formatter added by default.
Returns:
Original table with new or replaced column
Expand Down Expand Up @@ -487,6 +488,8 @@ def append_column(self, label, values):
self._num_rows = len(values)

self._columns[label] = values
if (formatter != None):
self.set_format(label, formatter)

def relabel(self, column_label, new_label):
"""Changes the label(s) of column(s) specified by ``column_label`` to
Expand Down Expand Up @@ -1522,7 +1525,7 @@ def with_rows(self, rows):
self.append(self._with_columns(zip(*rows)))
return self

def with_column(self, label, values):
def with_column(self, label, values, formatter=None):
"""Return a new table with an additional or replaced column.
Args:
Expand All @@ -1533,6 +1536,8 @@ def with_column(self, label, values):
value in the new column is ``values``. If sequence of values,
new column takes on values in ``values``.
``formatter`` (single value): Specifies formatter for the new column. Defaults to no formatter.
Raises:
``ValueError``: If
- ``label`` is not a valid column name
Expand Down Expand Up @@ -1571,30 +1576,30 @@ def with_column(self, label, values):
ValueError: Column length mismatch. New column does not have the same number of rows as table.
"""
new_table = self.copy()
new_table.append_column(label, values)
new_table.append_column(label, values, formatter)
return new_table

def with_columns(self, *labels_and_values):
def with_columns(self, *labels_and_values_and_formats):
"""Return a table with additional or replaced columns.
Args:
``labels_and_values``: An alternating list of labels and values or
a list of label-value pairs. If one of the labels is in
``labels_and_values_and_formats``: An alternating list of labels, values, and (optionally) formatters or
a list of label-value pairs and/or label-value-formatter trios. If one of the labels is in
existing table, then every value in the corresponding column is
set to that value. If label has only a single value (``int``),
every row of corresponding column takes on that value.
Raises:
``ValueError``: If
- any label in ``labels_and_values`` is not a valid column
- any label in ``labels_and_values_and_formats`` is not a valid column
name, i.e if label is not of type (str).
- if any value in ``labels_and_values`` is a list/array and
- if any value in ``labels_and_values_and_formats`` is a list/array and`
does not have the same length as the number of rows in the
table.
``AssertionError``:
- 'incorrect columns format', if passed more than one sequence
(iterables) for ``labels_and_values``.
(iterables) for ``labels_and_values_and_formats``.
- 'even length sequence required' if missing a pair in
label-value pairs.
Expand Down Expand Up @@ -1632,23 +1637,30 @@ def with_columns(self, *labels_and_values):
...
ValueError: Column length mismatch. New column does not have the same number of rows as table.
"""
if len(labels_and_values) == 1:
labels_and_values = labels_and_values[0]
if isinstance(labels_and_values, collections.abc.Mapping):
labels_and_values = list(labels_and_values.items())
if not isinstance(labels_and_values, collections.abc.Sequence):
labels_and_values = list(labels_and_values)
if not labels_and_values:
if len(labels_and_values_and_formats) == 1:
labels_and_values_and_formats = labels_and_values_and_formats[0]
if isinstance(labels_and_values_and_formats, collections.abc.Mapping):
labels_and_values_and_formats = list(labels_and_values_and_formats.items())
if not isinstance(labels_and_values_and_formats, collections.abc.Sequence):
labels_and_values_and_formats = list(labels_and_values_and_formats)
if not labels_and_values_and_formats:
return self
first = labels_and_values[0]
first = labels_and_values_and_formats[0]
if not isinstance(first, str) and hasattr(first, '__iter__'):
for pair in labels_and_values:
assert len(pair) == 2, 'incorrect columns format'
labels_and_values = [x for pair in labels_and_values for x in pair]
assert len(labels_and_values) % 2 == 0, 'Even length sequence required'
for i in range(0, len(labels_and_values), 2):
label, values = labels_and_values[i], labels_and_values[i+1]
self = self.with_column(label, values)
for cluster in labels_and_values_and_formats:
assert len(cluster) == 2 or len(cluster) == 3, 'incorrect columns format'
labels_and_values_and_formats = [x for pair in labels_and_values_and_formats for x in pair]
i = 0
while i < len(labels_and_values_and_formats):
assert (i+1) < len(labels_and_values_and_formats), "incorrect format"
if i+2 < len(labels_and_values_and_formats) and not isinstance(labels_and_values_and_formats[i+2], str):
label, values, formatter = labels_and_values_and_formats[i], labels_and_values_and_formats[i+1], labels_and_values_and_formats[i+2]
i += 3
else:
label, values, formatter = labels_and_values_and_formats[i], labels_and_values_and_formats[i+1], None
i += 2
self = self.with_column(label, values, formatter)

return self

def relabeled(self, label, new_label):
Expand Down
92 changes: 92 additions & 0 deletions tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,27 @@ def test_append_column(table):
with(pytest.raises(ValueError)):
table.append_column(0, [1, 2, 3, 4])

def test_append_column_with_formatter(table):
column_1 = [10, 20, 30, 40]
column_2 = 'hello'
table.append_column('new_col1', column_1, CurrencyFormatter)
assert_equal(table, """
letter | count | points | new_col1
a | 9 | 1 | $10.00
b | 3 | 2 | $20.00
c | 3 | 2 | $30.00
z | 1 | 10 | $40.00
""")
table.append_column('new_col2', column_2)
print(table)
assert_equal(table, """
letter | count | points | new_col1 | new_col2
a | 9 | 1 | $10.00 | hello
b | 3 | 2 | $20.00 | hello
c | 3 | 2 | $30.00 | hello
z | 1 | 10 | $40.00 | hello
""")

def test_with_column(table):
column_1 = [10, 20, 30, 40]
column_2 = 'hello'
Expand Down Expand Up @@ -587,6 +608,77 @@ def test_with_column(table):
table.append_column('bad_col', [1, 2])
with(pytest.raises(ValueError)):
table.append_column(0, [1, 2, 3, 4])
def test_with_column_with_formatter(table):
column_1 = [10, 20, 30, 40]
column_2 = 'hello'
table2 = table.with_column('new_col1', column_1, CurrencyFormatter)
table3 = table2.with_column('new_col2', column_2)
assert_equal(table, """
letter | count | points
a | 9 | 1
b | 3 | 2
c | 3 | 2
z | 1 | 10
""")
assert_equal(table2, """
letter | count | points | new_col1
a | 9 | 1 | $10.00
b | 3 | 2 | $20.00
c | 3 | 2 | $30.00
z | 1 | 10 | $40.00
""")
assert_equal(table3, """
letter | count | points | new_col1 | new_col2
a | 9 | 1 | $10.00 | hello
b | 3 | 2 | $20.00 | hello
c | 3 | 2 | $30.00 | hello
z | 1 | 10 | $40.00 | hello
""")

def test_with_columns():
players = Table().with_columns('player_id', make_array(110234, 110235), 'wOBA', make_array(.354, .236))
assert_equal(players, """
player_id | wOBA
110234 | 0.354
110235 | 0.236
""")
players = players.with_columns('salaries', 'N/A', 'season', 2016)
assert_equal(players, """
player_id | wOBA | salaries | season
110234 | 0.354 | N/A | 2016
110235 | 0.236 | N/A | 2016
""")
salaries = Table().with_column('salary', make_array('$500,000', '$15,500,000'))
players = players.with_columns('salaries', salaries.column('salary'), 'years', make_array(6, 1))
assert_equal(players, """
player_id | wOBA | salaries | season | years
110234 | 0.354 | $500,000 | 2016 | 6
110235 | 0.236 | $15,500,000 | 2016 | 1
""")

def test_with_columns_with_formats():
players = Table().with_columns('player_id', make_array(110234, 110235), 'wOBA', make_array(.354, .236))
assert_equal(players, """
player_id | wOBA
110234 | 0.354
110235 | 0.236
""")
players = players.with_columns('salaries', 'N/A', 'season', 2016)
assert_equal(players, """
player_id | wOBA | salaries | season
110234 | 0.354 | N/A | 2016
110235 | 0.236 | N/A | 2016
""")
salaries = Table().with_column('salary', make_array(500000, 15500000))
players2 = players.with_columns('salaries', salaries.column('salary'), CurrencyFormatter, 'years', make_array(6, 1))
assert_equal(players2, """
player_id | wOBA | salaries | season | years
110234 | 0.354 | $500,000.00 | 2016 | 6
110235 | 0.236 | $15,500,000.00 | 2016 | 1
""")

with(pytest.raises(Exception)):
players3 = players.with_columns('salaries', salaries.column('salary'), make_array(7, 2), 'years', make_array(6, 1))


def test_append_table(table):
Expand Down

0 comments on commit ccb5f61

Please sign in to comment.