Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Commit

Permalink
Updated to datapackage@1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed Oct 11, 2017
1 parent c0b060a commit 051ef96
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 29 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -71,3 +71,6 @@ venv3/
tmp
.projectile
.~lock*

# Extra
datapackage
2 changes: 1 addition & 1 deletion data/datapackage.json
Expand Up @@ -7,7 +7,7 @@
"fields": [
{
"name": "id",
"type": "integer"
"type": "number"
},
{
"name": "name",
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Expand Up @@ -41,8 +41,7 @@ def read(*paths):
'openpyxl>=2.4,<3.0',
]
INSTALL_FORMAT_DATAPACKAGE_REQUIRES = [
# TODO: update after specs-v1
'datapackage<1.0',
'datapackage>=1.1.3,<2.0',
]
INSTALL_FORMAT_ODS_REQUIRES = [
'ezodf>=0.3,<2.0',
Expand Down
45 changes: 20 additions & 25 deletions tabulator/parsers/datapackage.py
Expand Up @@ -4,18 +4,16 @@
from __future__ import absolute_import
from __future__ import unicode_literals

import datapackage
import six

import datapackage
from ..parser import Parser
from .. import exceptions


# Module API

class DataPackageParser(Parser):
"""Parser to extract data from Tabular Data Packages.
See: http://specs.frictionlessdata.io/
"""

# Public
Expand All @@ -26,39 +24,37 @@ class DataPackageParser(Parser):

def __init__(self, loader, force_parse=False, resource=0):
self.__force_parse = force_parse
self.__resource = resource
self.__resource_pointer = resource
self.__extended_rows = None
self.__datapackage = None
self.__resource_iter = None
self.__encoding = None
self.__resource = None

@property
def closed(self):
return self.__resource_iter is None
return self.__extended_rows is None

def open(self, source, encoding=None):
self.close()
self.__datapackage = datapackage.DataPackage(source)
package = datapackage.DataPackage(source)
if isinstance(self.__resource_pointer, six.string_types):
self.__resource = package.get_resource(self.__resource_pointer)
else:
try:
self.__resource = package.resources[self.__resource_pointer]
except (TypeError, IndexError):
pass
if not self.__resource:
message = 'Data package "%s" doesn\'t have resource "%s"'
raise exceptions.SourceError(message % (source, self.__resource_pointer))
self.__resource.infer()
self.__encoding = self.__resource.descriptor.get('encoding')
self.reset()

def close(self):
if not self.closed:
self.__datapackage = None
self.__resource_iter = None
self.__extended_rows = None

def reset(self):
if isinstance(self.__resource, six.string_types):
named_resource = next(iter(filter(
lambda res: res.descriptor['name'] == self.__resource,
self.__datapackage.resources
))) # TODO: use data_package.getResource(name) when v1 is released
self.__resource_iter = named_resource.iter(keyed=True)
self.__encoding = named_resource.descriptor.get('encoding')
else:
indexed_resource = self.__datapackage.resources[self.__resource]
self.__resource_iter = indexed_resource.iter(keyed=True)
self.__encoding = indexed_resource.descriptor.get('encoding')
self.__extended_rows = self.__iter_extended_rows()

@property
Expand All @@ -72,6 +68,5 @@ def extended_rows(self):
# Private

def __iter_extended_rows(self):
for number, row in enumerate(self.__resource_iter, start=1):
keys, values = zip(*sorted(row.items()))
yield number, list(keys), list(values)
for row_number, headers, row in self.__resource.iter(extended=True):
yield (row_number - 1, headers, row)
2 changes: 1 addition & 1 deletion tests/formats/test_datapackage.py
Expand Up @@ -6,8 +6,8 @@

import os
import json
from mock import Mock
import pytest
from mock import Mock
from tabulator import Stream
from tabulator.parsers.datapackage import DataPackageParser

Expand Down

0 comments on commit 051ef96

Please sign in to comment.