Skip to content

Commit

Permalink
0.0.9 Add validate processor
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Sep 13, 2018
1 parent e57bbf8 commit a76f030
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 5 deletions.
13 changes: 11 additions & 2 deletions PROCESSORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ DataFlows comes with a few built-in processors which do most of the heavy liftin
- **delete_fields** - Removes some columns from the data
- **add_computed_field** - Adds new fields whose values are based on existing columns
- **find_replace** - Look for specific patterns in specific fields and replace them with new data
- **set_type** - Parse incoming data based on provided schema, validate the data in the process
- **set_type** - Modify schema, parse incoming data based on new schema, validate the data in the process
- **validate** - Parse incoming data based on existing schema, validate the incoming data in the process

### Manipulate the entire resource
- **sort_rows** - Sort incoming data based on key
Expand Down Expand Up @@ -243,7 +244,7 @@ Sets a field's data type and type options and validates its data based on its ne
This processor modifies the last resource in the package.

```python
def set_Type(name, **options):
def set_type(name, **options):
pass
```

Expand All @@ -254,6 +255,14 @@ def set_Type(name, **options):
etc.
(more info on possible options can be found in the [tableschema spec](https://frictionlessdata.io/specs/table-schema/))

#### validate.py
Validate incoming data based on existing type definitions.

```python
def validate():
pass
```

### Manipulate the entire resource
#### sort_rows.py
Sort incoming data based on key.
Expand Down
2 changes: 1 addition & 1 deletion dataflows/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.8
0.0.9
1 change: 1 addition & 0 deletions dataflows/processors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .load import load
from .printer import printer
from .set_type import set_type
from .validate import validate
from .dumpers import dump_to_path, dump_to_zip

from .add_computed_field import add_computed_field
Expand Down
10 changes: 10 additions & 0 deletions dataflows/processors/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .. import DataStreamProcessor, schema_validator


class validate(DataStreamProcessor):

def __init__(self):
super(validate, self).__init__()

def process_resource(self, res):
yield from super(validate, self).process_resource(schema_validator(res.res, res))
5 changes: 3 additions & 2 deletions dataflows/templates/main.tpl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataflows import Flow, load, dump_to_path, dump_to_zip, printer, add_metadata
from dataflows import sort_rows, filter_rows, find_replace, delete_fields, set_type, unpivot
from dataflows import sort_rows, filter_rows, find_replace, delete_fields, set_type, validate, unpivot


{% if 'custom' in processing %}
Expand Down Expand Up @@ -43,8 +43,9 @@ def {{slug}}():
delete_fields(['field_name']), # Pass a list of field names to delete from the data
{% endif %}
{% if 'set_type' in processing %}
set_type('field_name', type='number', constraints=dict(minimum=3)), # There are quite a few options you can user here
set_type('field_name', type='number', constraints=dict(minimum=3)), # There are quite a few options you can use here
# Take a look at https://frictionlessdata.io/specs/table-schema/
# Or you can simply use validate() here instead
{% endif %}
{% if 'unpivot' in processing %}
unpivot(unpivot_fields, extra_keys, extra_value), # See documentation on the meaning of each of these parameters
Expand Down
22 changes: 22 additions & 0 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,28 @@ def filter_pythagorean_triplets(rows):
)
_ = f.process()

def test_validate():
from dataflows import Flow, validate, set_type, printer

def adder(row):
row['a'] += 0.5
row['a'] = str(row['a'])


f = Flow(
(dict(a=x) for x in range(10)),
set_type('a', type='integer'),
adder,
validate(),
printer()
)
try:
_ = f.process()
assert False
except ValueError:
pass


def test_example_7():
from dataflows import Flow, load, dump_to_path

Expand Down

0 comments on commit a76f030

Please sign in to comment.