Skip to content

Commit

Permalink
[resources][m]: avoid duplication of original resources - fixes #40
Browse files Browse the repository at this point in the history
  • Loading branch information
zelima committed Mar 14, 2018
1 parent 374ce4b commit 38b2a2e
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 17 deletions.
9 changes: 7 additions & 2 deletions planner/nodes/basic_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,19 @@ def __init__(self, available_artifacts, outputs):
def get_artifacts(self):
for artifact in self.available_artifacts:
if artifact.datahub_type == 'original' or artifact.datahub_type == 'source/tabular':
resource_name = artifact.resource_name
# Avoid duplication: if resource name ends with original
# and type is original -> is identical to source/tabular
if resource_name.endswith('_original') and artifact.datahub_type == 'original':
continue
output = ProcessingArtifact(
artifact.datahub_type, artifact.resource_name,
[], [artifact],
[('assembler.update_resource',
{
'name': artifact.resource_name,
'name': resource_name,
'update': {
'name': artifact.resource_name,
'name': resource_name,
'datahub': {
'type': artifact.datahub_type
}
Expand Down
7 changes: 5 additions & 2 deletions planner/nodes/output_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@ def get_artifacts(self):
random.randrange(1000), out_file))
datahub_type = 'derived/{}'.format(self.fmt)
resource_name = out_file.replace('.', '_')

# Exclude source/tabular as in zip it's duplicate of derived/csv
artifacts = [
a for a in self.available_artifacts if a.datahub_type != 'source/tabular'
]
output = ProcessingArtifact(
datahub_type, resource_name,
[], self.available_artifacts,
[], artifacts,
[('assembler.extract_readme', {}),
('assembler.remove_hash', {}),
('dump.to_zip', {
Expand Down
20 changes: 7 additions & 13 deletions planner/nodes/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def planner(datapackage_input, prefix, processing, outputs, allowed_types=None):

# Add types for all resources
resource_mapping = parameters.get('resource-mapping', {})
tabular_info = []
for descriptor in resource_info:
path = descriptor['path']
name = descriptor['name']
Expand Down Expand Up @@ -73,21 +72,14 @@ def planner(datapackage_input, prefix, processing, outputs, allowed_types=None):
del descriptor['schema']
descriptor['geojsonSchema'] = schema

if 'schema' in descriptor:
tabular_descriptor = deepcopy(descriptor)

tabular_descriptor['datahub'] = {
'type': 'source/tabular'
}
tabular_info.append(tabular_descriptor)

descriptor['path'] = os.path.join('archive', '{}.{}'.format(name, extension))
descriptor['datahub'] = {
'type': 'original'
}
descriptor['path'] = os.path.join('archive', '{}.{}'.format(name, extension))
descriptor['name'] += '_original'

resource_info.extend(tabular_info)
if 'schema' in descriptor:
descriptor['datahub'] = {
'type': 'source/tabular'
}

# Processing on resources
processed_resources = set(p['input'] for p in processing)
Expand All @@ -103,6 +95,8 @@ def planner(datapackage_input, prefix, processing, outputs, allowed_types=None):

for p in processing:
if p['input'] == ri['name']:
# keep original resource without processing steps for zip
ri['name'] += '_original'
ri_ = deepcopy(ri)
if 'tabulator' in p:
ri_.update(p['tabulator'])
Expand Down

0 comments on commit 38b2a2e

Please sign in to comment.