Skip to content

Commit

Permalink
apply defaults to data package descriptor
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed Apr 6, 2017
1 parent 592c99e commit 10c9287
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 25 deletions.
25 changes: 25 additions & 0 deletions datapackage/config.py
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals


# Module API

DEFAULT_PACKAGE_PROFILE = 'data-package'
DEFAULT_RESOURCE_PROFILE = 'data-resource'
DEFAULT_RESOURCE_ENCODING = 'utf-8'
DEFAULT_FIELD_TYPE = 'string'
DEFAULT_FIELD_FORMAT = 'default'
DEFAULT_MISSING_VALUES = ['']
DEFAULT_DIALECT = {
'delimiter': ',',
'doubleQuote': True,
'lineTerminator': '\r\n',
'quoteChar': '""',
'escapeChar': '\\',
'skipInitialSpace': True,
'header': True,
'caseSensitiveHeader': False,
}
25 changes: 23 additions & 2 deletions datapackage/datapackage.py
Expand Up @@ -15,6 +15,7 @@
import warnings
import jsonpointer
import datapackage.schema
from . import config
from . import helpers
from .resource import Resource
from .exceptions import (
Expand Down Expand Up @@ -61,6 +62,7 @@ def __init__(self, descriptor=None, schema='data-package', default_base_path=Non
self._base_path = self._get_base_path(descriptor, default_base_path)
self._descriptor = self._load_descriptor(descriptor)
self._dereference_descriptor(self._descriptor)
self._apply_defaults(self._descriptor)

self._schema = self._load_schema(schema)
self._resources = self._load_resources(self.descriptor,
Expand Down Expand Up @@ -386,8 +388,6 @@ def _remove_tempdir_if_exists(self):

def _dereference_descriptor(self, descriptor):
PROPERTIES = ['schema', 'dialect']

# For every resource
for property in PROPERTIES:
for resource in descriptor.get('resources', []):
value = resource.get(property)
Expand Down Expand Up @@ -431,3 +431,24 @@ def _dereference_descriptor(self, descriptor):
raise DataPackageException(
'Not resolved Local URI "%s" '
'for resource.%s' % (value, property))

def _apply_defaults(self, descriptor):
descriptor.setdefault('profile', config.DEFAULT_PACKAGE_PROFILE)
for resource in descriptor.get('resources', []):
resource.setdefault('profile', config.DEFAULT_RESOURCE_PROFILE)
resource.setdefault('encoding', config.DEFAULT_RESOURCE_ENCODING)
if resource['profile'] == 'tabular-data-resource':

# Schema
schema = resource.get('schema')
if schema is not None:
for field in schema.get('fields', []):
field.setdefault('type', config.DEFAULT_FIELD_TYPE)
field.setdefault('format', config.DEFAULT_FIELD_FORMAT)
schema.setdefault('missingValues', config.DEFAULT_MISSING_VALUES)

# Dialect
dialect = resource.get('dialect')
if dialect is not None:
for key, value in config.DEFAULT_DIALECT.items():
dialect.setdefault(key, value)
11 changes: 11 additions & 0 deletions tests/conftest.py
Expand Up @@ -4,10 +4,14 @@
from __future__ import print_function
from __future__ import unicode_literals

import mock
import pytest
import tempfile
from datapackage import DataPackage


# Fixtures

@pytest.yield_fixture()
def tmpfile():
with tempfile.NamedTemporaryFile() as f:
Expand All @@ -24,3 +28,10 @@ def txt_tmpfile():
def csv_tmpfile():
with tempfile.NamedTemporaryFile(suffix='.csv') as f:
yield f


@pytest.yield_fixture()
def NoDefaultsDataPackage():
class NoDefaultsDataPackage(DataPackage):
_apply_defaults = mock.Mock()
yield NoDefaultsDataPackage
128 changes: 106 additions & 22 deletions tests/test_datapackage.py
Expand Up @@ -5,6 +5,7 @@
from __future__ import unicode_literals

import os
import sys
import glob
import json
import mock
Expand All @@ -26,14 +27,14 @@ def test_init_uses_base_schema_by_default(self):

def test_init_accepts_dicts(self):
descriptor = {
'foo': 'bar',
'profile': 'data-package',
}
dp = datapackage.DataPackage(descriptor)
assert dp.descriptor == descriptor

def test_init_accepts_filelike_object(self):
descriptor = {
'foo': 'bar',
'profile': 'data-package',
}
filelike_descriptor = six.StringIO(json.dumps(descriptor))
dp = datapackage.DataPackage(filelike_descriptor)
Expand All @@ -42,7 +43,9 @@ def test_init_accepts_filelike_object(self):
def test_init_accepts_file_paths(self):
path = test_helpers.fixture_path('empty_datapackage.json')
dp = datapackage.DataPackage(path)
assert dp.descriptor == {}
assert dp.descriptor == {
'profile': 'data-package',
}

def test_init_raises_if_file_path_doesnt_exist(self):
path = 'this-file-doesnt-exist.json'
Expand Down Expand Up @@ -77,12 +80,12 @@ def test_init_raises_if_filelike_object_isnt_a_json(self):
@httpretty.activate
def test_init_accepts_urls(self):
url = 'http://someplace.com/datapackage.json'
body = '{"foo": "bar"}'
body = '{"profile": "data-package"}'
httpretty.register_uri(httpretty.GET, url, body=body,
content_type='application/json')

dp = datapackage.DataPackage(url)
assert dp.descriptor == {'foo': 'bar'}
assert dp.descriptor == {'profile': 'data-package'}

@httpretty.activate
def test_init_raises_if_url_doesnt_exist(self):
Expand Down Expand Up @@ -144,36 +147,38 @@ def test_attributes(self):
descriptor = {
'name': 'test',
'title': 'a test',
'profile': 'data-package',
}
schema = {
'properties': {
'name': {}
}
}
dp = datapackage.DataPackage(descriptor, schema)
assert sorted(dp.attributes) == sorted(['name', 'title'])
assert sorted(dp.attributes) == sorted(['name', 'title', 'profile'])

def test_attributes_can_be_set(self):
descriptor = {
'name': 'foo',
'profile': 'data-package',
}
dp = datapackage.DataPackage(descriptor)
dp.descriptor['title'] = 'bar'
assert dp.to_dict() == {'name': 'foo', 'title': 'bar'}
assert dp.to_dict() == {'profile': 'data-package', 'title': 'bar'}

def test_attributes_arent_immutable(self):
descriptor = {
'profile': 'data-package',
'keywords': [],
}
dp = datapackage.DataPackage(descriptor)
dp.descriptor['keywords'].append('foo')
assert dp.to_dict() == {'keywords': ['foo']}
assert dp.to_dict() == {'profile': 'data-package', 'keywords': ['foo']}

def test_attributes_return_an_empty_tuple_if_there_are_none(self):
def test_attributes_return_defaults_id_descriptor_is_empty(self):
descriptor = {}
schema = {}
dp = datapackage.DataPackage(descriptor, schema)
assert dp.attributes == ()
assert dp.attributes == ('profile',)

def test_validate(self):
descriptor = {
Expand Down Expand Up @@ -235,11 +240,15 @@ def test_required_attributes_return_empty_tuple_if_nothings_required(self):
assert dp.required_attributes == ()

def test_to_dict_value_can_be_altered_without_changing_the_dp(self):
descriptor = {}
descriptor = {
'profile': 'data-package',
}
dp = datapackage.DataPackage(descriptor)
dp_dict = dp.to_dict()
dp_dict['foo'] = 'bar'
assert dp.descriptor == {}
assert dp.descriptor == {
'profile': 'data-package',
}

def test_to_json(self):
descriptor = {
Expand All @@ -248,14 +257,14 @@ def test_to_json(self):
dp = datapackage.DataPackage(descriptor)
assert json.loads(dp.to_json()) == descriptor

def test_descriptor_dereferencing_uri(self):
dp = datapackage.DataPackage('tests/fixtures/datapackage_with_dereferencing.json')
def test_descriptor_dereferencing_uri(self, NoDefaultsDataPackage):
dp = NoDefaultsDataPackage('tests/fixtures/datapackage_with_dereferencing.json')
assert dp.descriptor['resources'] == [
{'name': 'name1', 'schema': {'fields': [{'name': 'name'}]}},
{'name': 'name2', 'dialect': {'delimiter': ','}},
]

def test_descriptor_dereferencing_uri_pointer(self):
def test_descriptor_dereferencing_uri_pointer(self, NoDefaultsDataPackage):
descriptor = {
'resources': [
{'name': 'name1', 'schema': '#/schemas/main'},
Expand All @@ -264,7 +273,7 @@ def test_descriptor_dereferencing_uri_pointer(self):
'schemas': {'main': {'fields': [{'name': 'name'}]}},
'dialects': [{'delimiter': ','}],
}
dp = datapackage.DataPackage(descriptor)
dp = NoDefaultsDataPackage(descriptor)
assert dp.descriptor['resources'] == [
{'name': 'name1', 'schema': {'fields': [{'name': 'name'}]}},
{'name': 'name2', 'dialect': {'delimiter': ','}},
Expand All @@ -280,7 +289,9 @@ def test_descriptor_dereferencing_uri_pointer_bad(self):
dp = datapackage.DataPackage(descriptor)

@httpretty.activate
def test_descriptor_dereferencing_uri_remote(self):
@pytest.mark.skipif(sys.version_info < (3,3),
reason='Python2 conflict pytest/httpretty')
def test_descriptor_dereferencing_uri_remote(self, NoDefaultsDataPackage):
# Mocks
httpretty.register_uri(httpretty.GET,
'http://example.com/schema', body='{"fields": [{"name": "name"}]}')
Expand All @@ -293,7 +304,7 @@ def test_descriptor_dereferencing_uri_remote(self):
{'name': 'name2', 'dialect': 'https://example.com/dialect'},
],
}
dp = datapackage.DataPackage(descriptor)
dp = NoDefaultsDataPackage(descriptor)
assert dp.descriptor['resources'] == [
{'name': 'name1', 'schema': {'fields': [{'name': 'name'}]}},
{'name': 'name2', 'dialect': {'delimiter': ','}},
Expand All @@ -311,14 +322,14 @@ def test_descriptor_dereferencing_uri_remote_bad(self):
with pytest.raises(datapackage.exceptions.DataPackageException):
dp = datapackage.DataPackage(descriptor)

def test_descriptor_dereferencing_uri_local(self):
def test_descriptor_dereferencing_uri_local(self, NoDefaultsDataPackage):
descriptor = {
'resources': [
{'name': 'name1', 'schema': 'table_schema.json'},
{'name': 'name2', 'dialect': 'csv_dialect.json'},
],
}
dp = datapackage.DataPackage(descriptor, default_base_path='tests/fixtures')
dp = NoDefaultsDataPackage(descriptor, default_base_path='tests/fixtures')
assert dp.descriptor['resources'] == [
{'name': 'name1', 'schema': {'fields': [{'name': 'name'}]}},
{'name': 'name2', 'dialect': {'delimiter': ','}},
Expand All @@ -342,6 +353,79 @@ def test_descriptor_dereferencing_uri_local_bad_not_safe(self):
with pytest.raises(datapackage.exceptions.DataPackageException):
dp = datapackage.DataPackage(descriptor, default_base_path='tests/fixtures')

def test_descriptor_apply_defaults(self):
descriptor = {}
dp = datapackage.DataPackage(descriptor)
assert descriptor == {
'profile': 'data-package',
}

def test_descriptor_apply_defaults_resource(self):
descriptor = {
'resources': [{'name': 'name'}],
}
dp = datapackage.DataPackage(descriptor)
assert descriptor == {
'profile': 'data-package',
'resources': [
{'name': 'name', 'profile': 'data-resource', 'encoding': 'utf-8'},
]
}

def test_descriptor_apply_defaults_resource_tabular_schema(self):
descriptor = {
'resources': [{
'name': 'name',
'profile': 'tabular-data-resource',
'schema': {
'fields': [{'name': 'name'}],
}
}],
}
dp = datapackage.DataPackage(descriptor)
assert descriptor == {
'profile': 'data-package',
'resources': [{
'name': 'name',
'profile': 'tabular-data-resource',
'encoding': 'utf-8',
'schema': {
'fields': [{'name': 'name', 'type': 'string', 'format': 'default'}],
'missingValues': [''],
}
}],
}

def test_descriptor_apply_defaults_resource_tabular_dialect(self):
descriptor = {
'resources': [{
'name': 'name',
'profile': 'tabular-data-resource',
'dialect': {
'delimiter': 'custom',
}
}],
}
dp = datapackage.DataPackage(descriptor)
assert descriptor == {
'profile': 'data-package',
'resources': [{
'name': 'name',
'profile': 'tabular-data-resource',
'encoding': 'utf-8',
'dialect': {
'delimiter': 'custom',
'doubleQuote': True,
'lineTerminator': '\r\n',
'quoteChar': '""',
'escapeChar': '\\',
'skipInitialSpace': True,
'header': True,
'caseSensitiveHeader': False,
}
}],
}


class TestDataPackageResources(object):
def test_base_path_defaults_to_none(self):
Expand Down Expand Up @@ -689,7 +773,7 @@ def test_local_data_path(self, datapackage_zip):

def test_it_can_load_from_zip_files_inner_folders(self, tmpfile):
descriptor = {
'name': 'foo',
'profile': 'data-package',
}
with zipfile.ZipFile(tmpfile.name, 'w') as z:
z.writestr('foo/datapackage.json', json.dumps(descriptor))
Expand Down
3 changes: 2 additions & 1 deletion tests/test_pushpull.py
Expand Up @@ -41,7 +41,8 @@ def test_push_datapackage(storage):
]


def test_pull_datapackage(storage, descriptor):
@mock.patch.object(DataPackage, '_apply_defaults')
def test_pull_datapackage(_apply_defaults, storage, descriptor):

# Prepare and call
storage.buckets = ['data___data']
Expand Down

0 comments on commit 10c9287

Please sign in to comment.