Skip to content

Commit

Permalink
Merge branch '56-add-schema-file-dcat-ap-2.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed Jul 5, 2024
2 parents 568952a + ae78f0f commit 53cedb9
Show file tree
Hide file tree
Showing 29 changed files with 3,017 additions and 224 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,13 @@ jobs:
pip install -e .
# Replace default path to CKAN core config file with the one on the container
sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini
- name: Setup harvest extension
- name: Setup other extension
run: |
git clone https://github.com/ckan/ckanext-harvest
pip install -e ckanext-harvest
pip install -r ckanext-harvest/pip-requirements.txt
pip install -r ckanext-harvest/requirements.txt
git clone https://github.com/ckan/ckanext-scheming
pip install -e ckanext-scheming
- name: Setup extension
run: |
ckan -c test.ini db init
Expand Down
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,27 @@

## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...HEAD)

* Support for standard CKAN [ckanext-scheming](https://github.com/ckan/ckanext-scheming) schemas.
The DCAT profiles now seamlessly integrate with fields defined via the YAML or JSON scheming files.
Sites willing to migrate to a scheming based metadata schema can do
so by adding the `euro_dcat_ap_scheming` profile at the end of their profile chain (e.g.
`ckanext.dcat.rdf.profiles = euro_dcat_ap_2 euro_dcat_ap_scheming`), which will modify the existing profile
outputs to the expected format by the scheming validators. Sample schemas are provided
in the `ckanext/dcat/schemas` folder. See the [documentation](https://github.com/ckan/ckanext-dcat?tab=readme-ov-file#schemas)
for all details. Some highlights of the new scheming based profiles:

* Actual list support in the API ooutput for list properties like `dct:language`
* Multiple objects now allowed for properties like `dcat:ContactPoint`, `dct:spatial` or `dct:temporal`
* Custom validators for date values that allow `xsd:gYear`, `xsd:gYearMonth`, `xsd:date` and `xsd:dateTime`

(#281)
* New `ckan dcat consume` and `ckan dcat produce` CLI commands (#279)
* Parse dcat:spatialResolutionInMeters as float (#285)
* Split profile classes into their own separate files (#282)
* Catch Not Authorized in View (#280)
* CKAN 2.11 support and requirements updates (#270)


## [v1.7.0](https://github.com/ckan/ckanext-dcat/compare/v1.6.0...v1.7.0) - 2024-04-04

* Adds support for the latest Hydra vocabulary. For backward compatibility, the old properties are still supported but marked as deprecated. (#267)
Expand Down
264 changes: 221 additions & 43 deletions README.md

Large diffs are not rendered by default.

78 changes: 77 additions & 1 deletion ckanext/dcat/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from builtins import object
import os
import json

from ckantoolkit import config

Expand All @@ -19,6 +20,7 @@
dcat_auth,
)
from ckanext.dcat import utils
from ckanext.dcat.validators import dcat_validators


CUSTOM_ENDPOINT_CONFIG = 'ckanext.dcat.catalog_endpoint'
Expand All @@ -28,6 +30,19 @@
I18N_DIR = os.path.join(HERE, u"../i18n")


def _get_dataset_schema(dataset_type="dataset"):
schema = None
try:
schema_show = p.toolkit.get_action("scheming_dataset_schema_show")
try:
schema = schema_show({}, {"type": dataset_type})
except p.toolkit.ObjectNotFound:
pass
except KeyError:
pass
return schema


class DCATPlugin(p.SingletonPlugin, DefaultTranslation):

p.implements(p.IConfigurer, inherit=True)
Expand All @@ -38,6 +53,7 @@ class DCATPlugin(p.SingletonPlugin, DefaultTranslation):
p.implements(p.ITranslation, inherit=True)
p.implements(p.IClick)
p.implements(p.IBlueprint)
p.implements(p.IValidators)

# IClick

Expand Down Expand Up @@ -101,17 +117,31 @@ def get_auth_functions(self):
'dcat_catalog_search': dcat_auth,
}

# IValidators
def get_validators(self):
return dcat_validators

# IPackageController

# CKAN < 2.10 hooks
def after_show(self, context, data_dict):
return self.after_dataset_show(context, data_dict)

def before_index(self, dataset_dict):
return self.before_dataset_index(dataset_dict)

# CKAN >= 2.10 hooks
def after_dataset_show(self, context, data_dict):

schema = _get_dataset_schema(data_dict["type"])
# check if config is enabled to translate keys (default: True)
if not p.toolkit.asbool(config.get(TRANSLATE_KEYS_CONFIG, True)):
# skip if scheming is enabled, as this will be handled there
translate_keys = (
p.toolkit.asbool(config.get(TRANSLATE_KEYS_CONFIG, True))
and not schema
)

if not translate_keys:
return data_dict

if context.get('for_view'):
Expand All @@ -132,6 +162,52 @@ def set_titles(object_dict):

return data_dict

def before_dataset_index(self, dataset_dict):
schema = _get_dataset_schema(dataset_dict["type"])
spatial = None
if schema:
for field in schema['dataset_fields']:
if field['field_name'] in dataset_dict and 'repeating_subfields' in field:
for item in dataset_dict[field['field_name']]:
for key in item:
value = item[key]
if not isinstance(value, dict):
# Index a flattened version
new_key = f'extras_{field["field_name"]}__{key}'
if not dataset_dict.get(new_key):
dataset_dict[new_key] = value
else:
dataset_dict[new_key] += ' ' + value

subfields = dataset_dict.pop(field['field_name'], None)
if field['field_name'] == 'spatial_coverage':
spatial = subfields

# Store the first geometry found so ckanext-spatial can pick it up for indexing
def _check_for_a_geom(spatial_dict):
value = None

for field in ('geom', 'bbox', 'centroid'):
if spatial_dict.get(field):
value = spatial_dict[field]
if isinstance(value, dict):
try:
value = json.dumps(value)
break
except ValueError:
pass
return value

if spatial and not dataset_dict.get('spatial'):
for item in spatial:
value = _check_for_a_geom(item)
if value:
dataset_dict['spatial'] = value
dataset_dict['extras_spatial'] = value
break

return dataset_dict


class DCATJSONInterface(p.SingletonPlugin):
p.implements(p.IActions)
Expand Down
18 changes: 14 additions & 4 deletions ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,15 @@

class RDFProcessor(object):

def __init__(self, profiles=None, compatibility_mode=False):
def __init__(self, profiles=None, dataset_type='dataset', compatibility_mode=False):
'''
Creates a parser or serializer instance
You can optionally pass a list of profiles to be used.
A scheming dataset type can be provided, in which case the scheming schema
will be loaded by the base profile so it can be used by other profiles.
In compatibility mode, some fields are modified to maintain
compatibility with previous versions of the ckanext-dcat parsers
(eg adding the `dcat_` prefix or storing comma separated lists instead
Expand All @@ -56,6 +59,8 @@ def __init__(self, profiles=None, compatibility_mode=False):
raise RDFProfileException(
'No suitable RDF profiles could be loaded')

self.dataset_type = dataset_type

if not compatibility_mode:
compatibility_mode = p.toolkit.asbool(
config.get(COMPAT_MODE_CONFIG_OPTION, False))
Expand Down Expand Up @@ -177,11 +182,16 @@ def datasets(self):
for dataset_ref in self._datasets():
dataset_dict = {}
for profile_class in self._profiles:
profile = profile_class(self.g, self.compatibility_mode)
profile = profile_class(
self.g,
dataset_type=self.dataset_type,
compatibility_mode=self.compatibility_mode
)
profile.parse_dataset(dataset_dict, dataset_ref)

yield dataset_dict


class RDFSerializer(RDFProcessor):
'''
A CKAN to RDF serializer based on rdflib
Expand Down Expand Up @@ -245,7 +255,7 @@ def graph_from_dataset(self, dataset_dict):
dataset_ref = URIRef(dataset_uri(dataset_dict))

for profile_class in self._profiles:
profile = profile_class(self.g, self.compatibility_mode)
profile = profile_class(self.g, compatibility_mode=self.compatibility_mode)
profile.graph_from_dataset(dataset_dict, dataset_ref)

return dataset_ref
Expand All @@ -263,7 +273,7 @@ def graph_from_catalog(self, catalog_dict=None):
catalog_ref = URIRef(catalog_uri())

for profile_class in self._profiles:
profile = profile_class(self.g, self.compatibility_mode)
profile = profile_class(self.g, compatibility_mode=self.compatibility_mode)
profile.graph_from_catalog(catalog_dict, catalog_ref)

return catalog_ref
Expand Down
1 change: 1 addition & 0 deletions ckanext/dcat/profiles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@

from .euro_dcat_ap import EuropeanDCATAPProfile
from .euro_dcat_ap_2 import EuropeanDCATAP2Profile
from .euro_dcat_ap_scheming import EuropeanDCATAPSchemingProfile
from .schemaorg import SchemaOrgProfile
Loading

0 comments on commit 53cedb9

Please sign in to comment.