Skip to content

Commit

Permalink
v0.2.17 Allow sort_rows to receive a callable as well
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed May 31, 2021
1 parent bdbaa58 commit 139b251
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 19 deletions.
4 changes: 3 additions & 1 deletion PROCESSORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,9 @@ def sort_rows(key, resources=None, reverse=False):
pass
```

- `key` - String, which would be interpreted as a Python format string used to form the key (e.g. `{<field_name_1>}:{field_name_2}`)
- `key` - either:
- string, which would be interpreted as a Python format string used to form the key (e.g. `{<field_name_1>}:{field_name_2}`)
- callable, which receives a row and returns a string, to be used as the sorting key
- `resources`
- A name of a resource to operate on
- A regular expression matching resource names
Expand Down
2 changes: 1 addition & 1 deletion dataflows/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.16
0.2.17
44 changes: 27 additions & 17 deletions dataflows/processors/sort_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,35 @@

class KeyCalc(object):
def __init__(self, key_spec):
self.key_spec = key_spec
self.key_list = re.findall(r'\{(.*?)\}', key_spec)
self.calculator = self.__calculator(key_spec)

def __calculator(self, key_spec):
if callable(key_spec):
return key_spec
if isinstance(key_spec, str):
key_list = re.findall(r'\{(.*?)\}', key_spec)

def func(row):
context = row.copy()
for key, value in row.items():
# We need to stringify some types to make them properly comparable
if key in key_list:
# numbers
# https://www.h-schmidt.net/FloatConverter/IEEE754.html
if isinstance(value, (int, float, decimal.Decimal)):
bits = BitArray(float=value, length=64)
# invert the sign bit
bits.invert(0)
# invert negative numbers
if value < 0:
bits.invert(range(1, 64))
context[key] = bits.hex
return key_spec.format(**context)
return func
assert False, 'key should be either a format string or a row->string callable'

def __call__(self, row):
context = row.copy()
for key, value in row.items():
# We need to stringify some types to make them properly comparable
if key in self.key_list:
# numbers
# https://www.h-schmidt.net/FloatConverter/IEEE754.html
if isinstance(value, (int, float, decimal.Decimal)):
bits = BitArray(float=value, length=64)
# invert the sign bit
bits.invert(0)
# invert negative numbers
if value < 0:
bits.invert(range(1, 64))
context[key] = bits.hex
return self.key_spec.format(**context)
return self.calculator(row)


def _sorter(rows, key_calc, reverse, batch_size):
Expand Down
21 changes: 21 additions & 0 deletions tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,27 @@ def test_sort_rows():
]


def test_sort_rows_callable():
from dataflows import sort_rows

f = Flow(
[
{'a': 1, 'b': 3},
{'a': 2, 'b': 3},
{'a': 3, 'b': 1},
{'a': 4, 'b': 1},
],
sort_rows(key=lambda r: '%04d|%04d' % (r['b'], r['a'])),
)
results, _, _ = f.results()
assert list(results[0]) == [
{'a': 3, 'b': 1},
{'a': 4, 'b': 1},
{'a': 1, 'b': 3},
{'a': 2, 'b': 3},
]


def test_sort_reverse_many_rows():
from dataflows import sort_rows

Expand Down

0 comments on commit 139b251

Please sign in to comment.