Skip to content

Commit

Permalink
v0.0.67 introduce update_schema processor
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Jan 19, 2020
1 parent c525ef7 commit ad374b2
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 1 deletion.
18 changes: 18 additions & 0 deletions PROCESSORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,24 @@ You can use `update_resource` to rename a resource like so:
update_resource('current-name', name='new-name')
```

#### update_schema.py
Update schema properties for one or more resources in the package

```python
def update_schema(resources, **metadata):
pass
```

- `resources`
- A name of a resource to operate on
- A regular expression matching resource names
- A list of resource names
- `None` indicates operation should be done on all resources
- The index of the resource in the package
- `metadata` - Any allowed schema property (according to the [spec]([https://frictionlessdata.io/specs/table-schema/#descriptor)) can be provided here.

You can use `update_schema` to add a `missingValues` property, change the primary key etc.

#### set_primary_key.py
Updates the primary key for one or more resources in the package

Expand Down
2 changes: 1 addition & 1 deletion dataflows/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.66
0.0.67
5 changes: 5 additions & 0 deletions dataflows/base/schema_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,8 @@ def schema_validator(resource, iterator,
continue

yield row


schema_validator.drop = drop
schema_validator.ignore = ignore
schema_validator.raise_exception = raise_exception
1 change: 1 addition & 0 deletions dataflows/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@
from .unstream import unstream
from .update_package import update_package, add_metadata
from .update_resource import update_resource
from .update_schema import update_schema
21 changes: 21 additions & 0 deletions dataflows/processors/update_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from dataflows import PackageWrapper
from dataflows.helpers.resource_matcher import ResourceMatcher


def update_schema(resources, **props):

def func(package: PackageWrapper):
matcher = ResourceMatcher(resources, package.pkg)
for resource in package.pkg.descriptor['resources']:
if matcher.match(resource['name']):
resource.setdefault('schema', {}).update(props)
yield package.pkg

res_iter = iter(package)
for r in res_iter:
if matcher.match(r.res.name):
yield r.it
else:
yield r

return func
16 changes: 16 additions & 0 deletions tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,22 @@ def test_update_resource():
assert dp.descriptor['resources'][4]['source'] == 'thewild'


def test_update_schema():
from dataflows import Flow, printer, update_schema, validate

f = Flow(
[['a', '-'], ['a', 0]],
update_schema(-1, missingValues=['-']),
validate(),
printer()
)
results, dp, stats = f.results()
print(dp.descriptor)
assert results[0] == [
dict(col0='a', col1=None),
dict(col0='a', col1=0),
]

def test_set_type_resources():
from dataflows import Flow, set_type, validate

Expand Down

0 comments on commit ad374b2

Please sign in to comment.