Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add improved sector support #22

Merged
merged 8 commits into from
Dec 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `start` and `end` properties to activities
- Error when there’s no data ([#12](https://github.com/andylolz/pyandi/issues/12))
- Error when there are no codelists
- Improved sector support ([#22](https://github.com/andylolz/pyandi/pull/22))

### Changed
- Change internal representation of codelists
Expand Down
5 changes: 3 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,17 @@ More complicated activity filters
registry = pyandi.data()

dfid = registry.publishers.find(name='dfid')
sector_category = pyandi.sector(311, 2) # Agriculture

ag_acts = dfid.activities.where(
actual_start__lte='2017-12-31',
actual_end__gte='2017-01-01',
sector__startswith='311', # Agriculture
sector__in=sector_category,
)
print('DFID had {:,} agricultural activities running during 2017.'.format(
len(ag_acts)))

# DFID had 176 agricultural activities running during 2017.
# DFID had 180 agricultural activities running during 2017.

TODO
----
Expand Down
1 change: 1 addition & 0 deletions pyandi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .data.registry import Registry as data # noqa: F401
from .data.sector import Sector as sector # noqa: F401
from .standard.codelist import CodelistSet as codelists # noqa: F401
from .utils import download # noqa: F401
5 changes: 3 additions & 2 deletions pyandi/data/activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@
class ActivitySet(GenericSet):
def __init__(self, datasets, **kwargs):
super().__init__()
self._wheres = kwargs
self._key = 'iati_identifier'
self._filters = [
'iati_identifier', 'title', 'description',
'location', 'sector', 'planned_start',
'actual_start', 'planned_end', 'actual_end',
]
self._wheres = kwargs
self._instance_class = Activity

self.datasets = datasets
self._filetype = 'activity'
self._element = 'iati-activity'
self._instance_class = Activity

def __len__(self):
total = 0
Expand Down
4 changes: 3 additions & 1 deletion pyandi/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
class DatasetSet(GenericSet):
def __init__(self, data_path, metadata_path, **kwargs):
super().__init__()
self._wheres = kwargs
self._key = 'name'
self._filters = ['name', 'filetype']
self._wheres = kwargs
self._instance_class = Dataset

self.data_path = data_path
self.metadata_path = metadata_path

Expand Down
64 changes: 64 additions & 0 deletions pyandi/data/sector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from ..standard.codelist import CodelistSet, CodelistItem


class Sector:
def __init__(self, code, vocabulary=None, percentage=None):
codelists = CodelistSet()

if percentage is not None:
self.percentage = float(percentage)
else:
self.percentage = None

if vocabulary:
vocab_item = codelists.get('Vocabulary').get(vocabulary)
if vocab_item is not None:
new_code = {
'ADT': '6', 'COFOG': '3',
'DAC': '1', 'DAC-3': '2',
'ISO': None, 'NACE': '4',
'NTEE': '5', 'RO': '99',
'RO2': '98', 'WB': None,
}.get(vocab_item.code)
if new_code:
vocab_item = codelists.get(
'SectorVocabulary').get(new_code)
else:
vocab_item = codelists.get('SectorVocabulary').get(vocabulary)
if vocab_item is None:
raise Exception('Unknown vocabulary')
if vocab_item.code in ['DAC', '1']:
self.code = codelists.get('Sector').get(code)
elif vocab_item.code in ['DAC-3', '2']:
self.code = codelists.get('SectorCategory').get(code)
else:
self.code = str(code)
if self.code is None:
raise Exception('Code and vocabulary don\'t match')
self.vocabulary = vocab_item
else:
if type(code) is CodelistItem:
if code.codelist.slug == 'Sector':
self.vocabulary = codelists.get(
'SectorVocabulary').get('1')
elif code.codelist.slug == 'SectorCategory':
self.vocabulary = codelists.get(
'SectorVocabulary').get('2')
else:
raise Exception('Invalid sector code: {}'.format(code))
self.code = code
else:
self.code = str(code)
self.vocabulary = None

def __repr__(self):
if type(self.code) is CodelistItem:
txt = '{} ({}), Vocabulary: {}'.format(
self.code.name, self.code.code, self.vocabulary.name)
else:
if self.vocabulary:
txt = '{}, Vocabulary: {}'.format(
self.code, self.vocabulary.name)
else:
txt = '{}, Vocabulary: Unspecified'.format(self.code)
return '<{} ({})>'.format(self.__class__.__name__, txt)
15 changes: 13 additions & 2 deletions pyandi/standard/activity_schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ..utils.exceptions import SchemaError
from ..utils.types import StringType, DateType
from ..utils.types import StringType, DateType, SectorType
from ..utils.abstract import GenericType


Expand All @@ -20,7 +20,11 @@ def location(self):
return GenericType('location')

def sector(self):
return StringType('sector/@code')
condition = {
'1': [None, 'DAC'],
'2': ['DAC-3'],
}
return SectorType('sector', self.version, condition)

def planned_start(self):
return DateType('activity-date[@type="start-planned"]/@iso-date')
Expand Down Expand Up @@ -65,6 +69,13 @@ def title(self):
def description(self):
return StringType('description/narrative/text()')

def sector(self):
condition = {
'1': [None, '1'],
'2': ['2'],
}
return SectorType('sector', self.version, condition)

def planned_start(self):
return DateType('activity-date[@type="1"]/@iso-date')

Expand Down
17 changes: 13 additions & 4 deletions pyandi/standard/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
class CodelistSet(GenericSet):
def __init__(self, path=None, **kwargs):
super().__init__()
self._wheres = kwargs
self._key = 'name'
self._filters = ['name', 'version']
self._wheres = kwargs
self._instance_class = Codelist

if not path:
path = join('__pyandicache__', 'standard', 'codelists')
self.path = path
Expand Down Expand Up @@ -47,9 +49,11 @@ def download(self):
class Codelist(GenericSet):
def __init__(self, slug, path, version, **kwargs):
super().__init__()
self._wheres = kwargs
self._key = 'code'
self._filters = ['code', 'version']
self._filters = ['code', 'version', 'category']
self._wheres = kwargs
self._instance_class = CodelistItem

self.slug = slug
self.path = join(path, slug + '.json')
self.version = version
Expand All @@ -74,14 +78,19 @@ def metadata(self):

def __iter__(self):
code = self._wheres.get('code')
category = self._wheres.get('category')
version = self._wheres.get('version', self.version)
if version is not None:
version = str(version)
if code is not None:
code = str(code)
if category is not None:
category = str(category)
for data in self.data.values():
if code is not None and data['code'] != code:
continue
if category is not None and data['category'] != category:
continue
if version is not None:
version_from = data.get('from')
version_until = data.get('until')
Expand Down Expand Up @@ -117,7 +126,7 @@ def complete(self):

class CodelistItem:
def __init__(self, codelist, **kwargs):
self._category = kwargs.get('category')
self.category = kwargs.get('category')
self.status = kwargs.get('status', 'active')
self.code = kwargs.get('code')
self.name = kwargs.get('name')
Expand Down
3 changes: 3 additions & 0 deletions pyandi/utils/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def __init__(self):
self._key = None
self._filters = []
self._wheres = {}
self._instance_class = None

def where(self, **kwargs):
for k in kwargs.keys():
Expand Down Expand Up @@ -40,6 +41,8 @@ def all(self):
def get(self, item=None):
if not item:
return self.all()
if type(item) is self._instance_class:
item = getattr(item, self._key)
return self.find(**{self._key: item})

def find(self, **kwargs):
Expand Down
58 changes: 58 additions & 0 deletions pyandi/utils/types.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
from datetime import datetime

from ..data.sector import Sector
from ..standard.codelist import CodelistSet, CodelistItem
from ..utils.abstract import GenericType


Expand Down Expand Up @@ -44,3 +46,59 @@ def exec(self, xml):
except ValueError:
logger.warn('Invalid date: "{}"'.format(date_str))
return dates


class SectorType(GenericType):
def __init__(self, expr, version, condition):
super().__init__(expr)
self.condition = condition

def _vocab_condition(self, conditions):
conditions_list = []
for condition in conditions:
if condition is None:
conditions_list.append('not(@vocabulary)')
else:
conditions_list.append('@vocabulary = "{}"'.format(condition))
conditions_str = ' or '.join(conditions_list)
if len(conditions_list) > 1:
conditions_str = '({})'.format(conditions_str)
return conditions_str

def where(self, op, value):
if op == 'in':
if type(value) is not Sector or value.vocabulary.code != '2':
raise Exception('{} is not a sector category'.format(value))
codelist_items = CodelistSet().get('Sector').where(
category=value.code.code).all()
conditions = ' or '.join(['@code = "{code}"'.format(code=c.code)
for c in codelist_items])
conditions = ['(' + conditions + ')']
conditions.append(
self._vocab_condition(self.condition.get('1')))
return '{expr}[{conditions}]'.format(
expr=self.get(),
conditions=' and '.join(conditions),
)
elif op == 'eq':
if type(value) is not Sector:
raise Exception('{} is not a sector'.format(value))
if type(value.code) is CodelistItem:
code = value.code.code
else:
code = value.code
conditions = ['@code = "{code}"'.format(code=code)]
if value.vocabulary is not None:
conds = self.condition.get(value.vocabulary.code)
conditions.append(self._vocab_condition(conds))
return '{expr}[{conditions}]'.format(
expr=self.get(),
conditions=' and '.join(conditions),
)
return super().where(op, value)

def exec(self, xml):
return [Sector(x.get('code'),
vocabulary=x.get('vocabulary'),
percentage=x.get('percentage'))
for x in xml.xpath(self.get())]