Skip to content

Commit

Permalink
[#56] Allow to provide a dataset schema to profiles
Browse files Browse the repository at this point in the history
This allows to check if a field should be stored as a custom field or an
extra
  • Loading branch information
amercader committed May 8, 2024
1 parent 48b5e61 commit 65abb1f
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 7 deletions.
15 changes: 11 additions & 4 deletions ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

class RDFProcessor(object):

def __init__(self, profiles=None, compatibility_mode=False):
def __init__(self, profiles=None, dataset_schema='dataset', compatibility_mode=False):
'''
Creates a parser or serializer instance
Expand All @@ -56,6 +56,8 @@ def __init__(self, profiles=None, compatibility_mode=False):
raise RDFProfileException(
'No suitable RDF profiles could be loaded')

self.dataset_schema = dataset_schema

if not compatibility_mode:
compatibility_mode = p.toolkit.asbool(
config.get(COMPAT_MODE_CONFIG_OPTION, False))
Expand Down Expand Up @@ -177,11 +179,16 @@ def datasets(self):
for dataset_ref in self._datasets():
dataset_dict = {}
for profile_class in self._profiles:
profile = profile_class(self.g, self.compatibility_mode)
profile = profile_class(
self.g,
dataset_schema=self.dataset_schema,
compatibility_mode=self.compatibility_mode
)
profile.parse_dataset(dataset_dict, dataset_ref)

yield dataset_dict


class RDFSerializer(RDFProcessor):
'''
A CKAN to RDF serializer based on rdflib
Expand Down Expand Up @@ -245,7 +252,7 @@ def graph_from_dataset(self, dataset_dict):
dataset_ref = URIRef(dataset_uri(dataset_dict))

for profile_class in self._profiles:
profile = profile_class(self.g, self.compatibility_mode)
profile = profile_class(self.g, compatibility_mode=self.compatibility_mode)
profile.graph_from_dataset(dataset_dict, dataset_ref)

return dataset_ref
Expand All @@ -263,7 +270,7 @@ def graph_from_catalog(self, catalog_dict=None):
catalog_ref = URIRef(catalog_uri())

for profile_class in self._profiles:
profile = profile_class(self.g, self.compatibility_mode)
profile = profile_class(self.g, compatibility_mode=self.compatibility_mode)
profile.graph_from_catalog(catalog_dict, catalog_ref)

return catalog_ref
Expand Down
62 changes: 59 additions & 3 deletions ckanext/dcat/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dateutil.parser import parse as parse_date

import ckantoolkit as toolkit
from ckantoolkit import config
from ckantoolkit import url_for

Expand All @@ -15,7 +16,6 @@
from geomet import wkt, InvalidGeoJSONException

from ckan.model.license import LicenseRegister
from ckan.plugins import toolkit
from ckan.lib.munge import munge_tag
from ckan.lib.helpers import resource_formats
from ckanext.dcat.utils import resource_uri, publisher_uri_organization_fallback, DCAT_EXPOSE_SUBCATALOGS, DCAT_CLEAN_TAGS
Expand Down Expand Up @@ -55,6 +55,19 @@

DISTRIBUTION_LICENSE_FALLBACK_CONFIG = 'ckanext.dcat.resource.inherit.license'

ROOT_DATASET_FIELDS = [
'name',
'title',
'url',
'version',
'tags',
'license_id',
'maintainer',
'maintainer_email',
'author',
'author_email',
]


class URIRefOrLiteral(object):
'''Helper which creates an URIRef if the value appears to be an http URL,
Expand Down Expand Up @@ -111,7 +124,9 @@ class RDFProfile(object):
custom profiles
'''

def __init__(self, graph, compatibility_mode=False):
_dataset_schema = None

def __init__(self, graph, dataset_schema='dataset', compatibility_mode=False):
'''Class constructor
Graph is an rdflib.Graph instance.
Expand All @@ -130,6 +145,15 @@ def __init__(self, graph, compatibility_mode=False):
# _license().
self._licenceregister_cache = None

schema_show = toolkit.get_action("scheming_dataset_schema_show")
if schema_show:
try:
schema = schema_show({}, {"type": dataset_schema})
except toolkit.ObjectNotFound:
raise toolkit.ObjectNotFound(f"Unknown dataset schema: {dataset_schema}")

self._dataset_schema = schema

def _datasets(self):
'''
Generator that returns all DCAT datasets on the graph
Expand Down Expand Up @@ -695,6 +719,38 @@ def _add_spatial_to_dict(self, dataset_dict, key, spatial):
{'key': 'spatial_{0}'.format(key) if key != 'geom' else 'spatial',
'value': spatial.get(key)})

def _schema_field(self, key):
'''
Returns the schema field information if the provided key exists as a field in
the dataset schema (if one was provided)
'''
if not self._dataset_schema:
return None

for field in self._dataset_schema['dataset_fields']:
if field['field_name'] == key:
return field

def _set_dataset_value(self, dataset_dict, key, value):
'''
Sets the value for a given key in a CKAN dataset dict
If a dataset schema was provided, the schema will be checked to see if
a custom field is present for the key. If so the key will be stored at
the dict root level, otherwise it will be stored as an extra.
Standard CKAN fields (defined in ROOT_DATASET_FIELDS) are always stored
at the root level.
'''
if self._schema_field(key) or key in ROOT_DATASET_FIELDS:
dataset_dict[key] = value
else:
if not dataset_dict.get('extras'):
dataset_dict['extras'] = []
dataset_dict['extras'].append({'key': key, 'value': value})

return dataset_dict

def _get_dataset_value(self, dataset_dict, key, default=None):
'''
Returns the value for the given key on a CKAN dict
Expand Down Expand Up @@ -1021,7 +1077,7 @@ def parse_dataset(self, dataset_dict, dataset_ref):
):
value = self._object_value(dataset_ref, predicate)
if value:
dataset_dict['extras'].append({'key': key, 'value': value})
self._set_dataset_value(dataset_dict, key, value)

# Lists
for key, predicate, in (
Expand Down

0 comments on commit 65abb1f

Please sign in to comment.