From 978d2c00b07e8d981a40e5bcb7b40776fd39e807 Mon Sep 17 00:00:00 2001 From: roll Date: Wed, 11 Oct 2017 12:04:27 +0300 Subject: [PATCH] Updated to datapackage@1.0 --- .gitignore | 3 +++ data/datapackage.json | 2 +- setup.py | 3 +-- tabulator/parsers/datapackage.py | 45 ++++++++++++++----------------- tests/formats/test_datapackage.py | 2 +- 5 files changed, 26 insertions(+), 29 deletions(-) diff --git a/.gitignore b/.gitignore index b6408fe9..6c45bcaa 100644 --- a/.gitignore +++ b/.gitignore @@ -71,3 +71,6 @@ venv3/ tmp .projectile .~lock* + +# Extra +datapackage diff --git a/data/datapackage.json b/data/datapackage.json index a66521cc..0fdbc7f4 100644 --- a/data/datapackage.json +++ b/data/datapackage.json @@ -7,7 +7,7 @@ "fields": [ { "name": "id", - "type": "integer" + "type": "number" }, { "name": "name", diff --git a/setup.py b/setup.py index a7f4dbbf..184cb1ad 100644 --- a/setup.py +++ b/setup.py @@ -41,8 +41,7 @@ def read(*paths): 'openpyxl>=2.4,<3.0', ] INSTALL_FORMAT_DATAPACKAGE_REQUIRES = [ - # TODO: update after specs-v1 - 'datapackage<1.0', + 'datapackage>=1.1.3,<2.0', ] INSTALL_FORMAT_ODS_REQUIRES = [ 'ezodf>=0.3,<2.0', diff --git a/tabulator/parsers/datapackage.py b/tabulator/parsers/datapackage.py index 1075ce91..bb7b2462 100644 --- a/tabulator/parsers/datapackage.py +++ b/tabulator/parsers/datapackage.py @@ -4,18 +4,16 @@ from __future__ import absolute_import from __future__ import unicode_literals -import datapackage import six - +import datapackage from ..parser import Parser +from .. import exceptions # Module API class DataPackageParser(Parser): """Parser to extract data from Tabular Data Packages. - - See: http://specs.frictionlessdata.io/ """ # Public @@ -26,39 +24,37 @@ class DataPackageParser(Parser): def __init__(self, loader, force_parse=False, resource=0): self.__force_parse = force_parse - self.__resource = resource + self.__resource_pointer = resource self.__extended_rows = None - self.__datapackage = None - self.__resource_iter = None self.__encoding = None + self.__resource = None @property def closed(self): - return self.__resource_iter is None + return self.__extended_rows is None def open(self, source, encoding=None): self.close() - self.__datapackage = datapackage.DataPackage(source) + package = datapackage.DataPackage(source) + if isinstance(self.__resource_pointer, six.string_types): + self.__resource = package.get_resource(self.__resource_pointer) + else: + try: + self.__resource = package.resources[self.__resource_pointer] + except (TypeError, IndexError): + pass + if not self.__resource: + message = 'Data package "%s" doesn\'t have resource "%s"' + raise exceptions.SourceError(message % (source, self.__resource_pointer)) + self.__resource.infer() + self.__encoding = self.__resource.descriptor.get('encoding') self.reset() def close(self): if not self.closed: - self.__datapackage = None - self.__resource_iter = None self.__extended_rows = None def reset(self): - if isinstance(self.__resource, six.string_types): - named_resource = next(iter(filter( - lambda res: res.descriptor['name'] == self.__resource, - self.__datapackage.resources - ))) # TODO: use data_package.getResource(name) when v1 is released - self.__resource_iter = named_resource.iter(keyed=True) - self.__encoding = named_resource.descriptor.get('encoding') - else: - indexed_resource = self.__datapackage.resources[self.__resource] - self.__resource_iter = indexed_resource.iter(keyed=True) - self.__encoding = indexed_resource.descriptor.get('encoding') self.__extended_rows = self.__iter_extended_rows() @property @@ -72,6 +68,5 @@ def extended_rows(self): # Private def __iter_extended_rows(self): - for number, row in enumerate(self.__resource_iter, start=1): - keys, values = zip(*sorted(row.items())) - yield number, list(keys), list(values) + for row_number, headers, row in self.__resource.iter(extended=True): + yield (row_number - 1, headers, row) diff --git a/tests/formats/test_datapackage.py b/tests/formats/test_datapackage.py index ae1a5368..a93e4058 100644 --- a/tests/formats/test_datapackage.py +++ b/tests/formats/test_datapackage.py @@ -6,8 +6,8 @@ import os import json -from mock import Mock import pytest +from mock import Mock from tabulator import Stream from tabulator.parsers.datapackage import DataPackageParser