-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
69 changed files
with
940 additions
and
1,316 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,3 +95,6 @@ tests/env/dummy/type-tests-output2 | |
tests/cli/.code | ||
.dpp | ||
.coverage.* | ||
.code/ | ||
.vscode/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.7.2 | ||
2.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,17 @@ | ||
import functools | ||
import collections | ||
from dataflows import Flow, add_computed_field | ||
from datapackage_pipelines.wrapper import ingest | ||
from datapackage_pipelines.utilities.flow_utils import spew_flow | ||
|
||
from datapackage_pipelines.wrapper import ingest, spew | ||
from datapackage_pipelines.utilities.resource_matcher import ResourceMatcher | ||
|
||
parameters, datapackage, resource_iterator = ingest() | ||
def flow(parameters): | ||
return Flow( | ||
add_computed_field( | ||
parameters.get('fields', []), | ||
parameters.get('resources') | ||
), | ||
) | ||
|
||
resources = ResourceMatcher(parameters.get('resources')) | ||
fields = parameters.get('fields', []) | ||
|
||
|
||
def modify_datapackage(datapackage_): | ||
dp_resources = datapackage_.get('resources', []) | ||
for resource_ in dp_resources: | ||
if resources.match(resource_['name']): | ||
new_fields = [ | ||
{ | ||
'name': f['target'], | ||
'type': get_type(resource_['schema']['fields'], | ||
f.get('source', []), | ||
f['operation']) | ||
} for f in fields | ||
] | ||
resource_['schema']['fields'] += new_fields | ||
return datapackage_ | ||
|
||
|
||
def get_type(res_fields, operation_fields, operation): | ||
types = [f.get('type') for f in res_fields if f['name'] in operation_fields] | ||
if 'any' in types: | ||
return 'any' | ||
if (operation == 'format') or (operation == 'join'): | ||
return 'string' | ||
if ('number' in types) or (operation == 'avg'): | ||
return 'number' | ||
# integers | ||
if len(types): | ||
return types[0] | ||
# constant | ||
return 'any' | ||
|
||
|
||
def process_resource(rows): | ||
for row in rows: | ||
for field in fields: | ||
values = [ | ||
row.get(c) for c in field.get('source', []) if row.get(c) is not None | ||
] | ||
with_ = field.get('with', '') | ||
new_col = AGGREGATORS[field['operation']].func(values, with_, row) | ||
row[field['target']] = new_col | ||
yield row | ||
|
||
|
||
def process_resources(resource_iterator_): | ||
for resource in resource_iterator_: | ||
spec = resource.spec | ||
if not resources.match(spec['name']): | ||
yield resource | ||
else: | ||
yield process_resource(resource) | ||
|
||
|
||
Aggregator = collections.namedtuple('Aggregator', ['func']) | ||
|
||
AGGREGATORS = { | ||
'sum': Aggregator(lambda values, fstr, row: sum(values)), | ||
'avg': Aggregator(lambda values, fstr, row: sum(values) / len(values)), | ||
'max': Aggregator(lambda values, fstr, row: max(values)), | ||
'min': Aggregator(lambda values, fstr, row: min(values)), | ||
'multiply': Aggregator( | ||
lambda values, fstr, row: functools.reduce(lambda x, y: x*y, values)), | ||
'constant': Aggregator(lambda values, fstr, row: fstr), | ||
'join': Aggregator( | ||
lambda values, fstr, row: fstr.join([str(x) for x in values])), | ||
'format': Aggregator(lambda values, fstr, row: fstr.format(**row)), | ||
} | ||
|
||
|
||
spew(modify_datapackage(datapackage), process_resources(resource_iterator)) | ||
if __name__ == '__main__': | ||
with ingest() as ctx: | ||
spew_flow(flow(ctx.parameters), ctx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,15 @@ | ||
from datapackage_pipelines.wrapper import ingest, spew | ||
import warnings | ||
|
||
parameters, datapackage, res_iter = ingest() | ||
if datapackage is None: | ||
datapackage = parameters | ||
else: | ||
datapackage.update(parameters) | ||
from datapackage_pipelines.wrapper import ingest | ||
from datapackage_pipelines.utilities.flow_utils import spew_flow | ||
|
||
spew(datapackage, res_iter) | ||
from datapackage_pipelines.lib.update_package import flow | ||
|
||
|
||
if __name__ == '__main__': | ||
warnings.warn( | ||
'add_metadata will be removed in the future, use "update_package" instead', | ||
DeprecationWarning | ||
) | ||
with ingest() as ctx: | ||
spew_flow(flow(ctx.parameters), ctx) |
Oops, something went wrong.