Skip to content

Commit

Permalink
Add datapackage_reports_path option.
Browse files Browse the repository at this point in the history
`datapackage_reports_path` defines the path used to write the `path`
property of a report in the datapackage.json. This maybe different from
the `reports_path`, which is where reports are actually written. If
`datapackage_reports_path` is missing, no `reports` property is added to
the datapackage.
  • Loading branch information
brew committed Sep 21, 2017
1 parent 845a1af commit 13f2d16
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 11 deletions.
8 changes: 6 additions & 2 deletions README.md
Expand Up @@ -28,18 +28,20 @@ Add the following to the pipeline-spec.yml configuration to validate each resour
- run: goodtables.validate
parameters:
fail_on_error: True,
reports_path: 'reports',
reports_path: 'path/to/datapackage/reports', # where reports will be written
datapackage_reports_path: 'reports', # relative to datapackage.json
write_report: True,
goodtables:
<key>: <value> # options passed to goodtables.validate()
```

- `fail_on_error`: An optional boolean to determine whether the pipeline should fail on validation error (default `True`).
- `reports_path`: An optional string to define where Goodtables reports should be written (default is `reports`).
- `datapackage_reports_path`: An optional string to define the path to the report, relative to the datapackage.json (see note below).
- `write_report`: An optional boolean to determine whether a goodtables validation report should be written to `reports_path` (default is `True`).
- `goodtables`: An optional object passed to `goodtables.validate()` to customise its behaviour. See [`goodtables.validate()`](https://github.com/frictionlessdata/goodtables-py/#validatesource-options) for available options.

If reports are written, a `reports` property will be added to the datapackage, detailing the path to the report for each resource:
If reports are written, and `datapackage_reports_path` is defined, a `reports` property will be added to the datapackage, detailing the path to the report for each resource:

```json
...
Expand All @@ -51,3 +53,5 @@ If reports are written, a `reports` property will be added to the datapackage, d
}
]
```

It is recommended that `datapackage_reports_path` is used to define a relative path, from the datapackage.json file, that represents where the report was written. `datapackage_reports_path` does not define where the reports will be written, but helps ensure a correct path is defined in the `reports` property in datapackage.json. This is useful when the pipeline concludes with a `dump_to.path` processor.
8 changes: 5 additions & 3 deletions datapackage_pipelines_goodtables/processors/validate.py
Expand Up @@ -16,6 +16,7 @@
write_report = parameters.get('write_report', True)
goodtables_options = parameters.get('goodtables', {})
reports_path = parameters.get('reports_path', 'reports')
datapackage_reports_path = parameters.get('datapackage_reports_path', None)


def process_resources(res_iter_, datapackage, goodtables_options):
Expand Down Expand Up @@ -57,14 +58,15 @@ def _validate_resource(res, dp_res):
yield _validate_resource(res, dp_res)


# add report info to datapackage
if write_report:
# Add report info to datapackage
if write_report and datapackage_reports_path is not None:
reports = datapackage.get('reports', [])
for dp_res in datapackage['resources']:
reports.append({
'resource': dp_res['name'],
'reportType': 'goodtables',
'path': '{}/{}.json'.format(reports_path, dp_res['name'])
'path': '{}/{}.json'.format(datapackage_reports_path,
dp_res['name'])
})
datapackage['reports'] = reports

Expand Down
16 changes: 10 additions & 6 deletions tests/test_validate.py
Expand Up @@ -50,7 +50,9 @@ def test_validate_processor_no_resources_no_params(self):
'project': 'my-project',
'resources': []
}
params = {}
params = {
'datapackage_reports_path': 'reports'
}

# Path to the processor we want to test
processor_dir = os.path.dirname(
Expand Down Expand Up @@ -79,7 +81,8 @@ def test_validate_processor_no_resources_with_params(self):
'resources': []
}
params = {
'fail_on_error': True
'fail_on_error': True,
'datapackage_reports_path': 'reports'
}

# Path to the processor we want to test
Expand Down Expand Up @@ -120,7 +123,8 @@ def test_validate_processor_valid_resource(self):
report_dir = '{}'.format(self.get_base_path())

params = {
'reports_path': report_dir
'reports_path': report_dir,
'datapackage_reports_path': 'reports',
}

def row_yielder():
Expand Down Expand Up @@ -150,7 +154,7 @@ def row_yielder():
assert len(reports) == 1
assert reports[0]['resource'] == 'my-resource'
assert reports[0]['reportType'] == 'goodtables'
assert reports[0]['path'] == '{}/my-resource.json'.format(report_dir)
assert reports[0]['path'] == 'reports/my-resource.json'

with io.open('{}/my-resource.json'.format(report_dir), 'r') as f:
report_json = json.loads(f.read())
Expand Down Expand Up @@ -336,6 +340,7 @@ def test_validate_processor_invalid_resource_dont_fail(self):

params = {
'reports_path': report_dir,
'datapackage_reports_path': 'reports',
'fail_on_error': False
}

Expand Down Expand Up @@ -366,8 +371,7 @@ def row_yielder():
assert len(reports) == 1
assert reports[0]['resource'] == 'resource-dont-fail-write'
assert reports[0]['reportType'] == 'goodtables'
assert reports[0]['path'] == \
'{}/resource-dont-fail-write.json'.format(report_dir)
assert reports[0]['path'] == 'reports/resource-dont-fail-write.json'

with io.open('{}/resource-dont-fail-write.json'.format(report_dir),
'r') as f:
Expand Down

0 comments on commit 13f2d16

Please sign in to comment.