ckan · amercader · Jul 5, 2024 · May 8, 2024 · May 8, 2024 · May 8, 2024
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -54,11 +54,13 @@ jobs:
         pip install -e .
         # Replace default path to CKAN core config file with the one on the container
         sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini
-    - name: Setup harvest extension
+    - name: Setup other extension
       run: |
         git clone https://github.com/ckan/ckanext-harvest
         pip install -e ckanext-harvest
-        pip install -r ckanext-harvest/pip-requirements.txt
+        pip install -r ckanext-harvest/requirements.txt
+        git clone https://github.com/ckan/ckanext-scheming
+        pip install -e ckanext-scheming
     - name: Setup extension
       run: |
         ckan -c test.ini db init

diff --git a/README.md b/README.md
diff --git a/ckanext/dcat/plugins/__init__.py b/ckanext/dcat/plugins/__init__.py
@@ -2,6 +2,7 @@
 
 from builtins import object
 import os
+import json
 
 from ckantoolkit import config
 
@@ -19,6 +20,7 @@
                                 dcat_auth,
                                 )
 from ckanext.dcat import utils
+from ckanext.dcat.validators import dcat_validators
 
 
 CUSTOM_ENDPOINT_CONFIG = 'ckanext.dcat.catalog_endpoint'
@@ -28,6 +30,19 @@
 I18N_DIR = os.path.join(HERE, u"../i18n")
 
 
+def _get_dataset_schema(dataset_type="dataset"):
+    schema = None
+    try:
+        schema_show = p.toolkit.get_action("scheming_dataset_schema_show")
+        try:
+            schema = schema_show({}, {"type": dataset_type})
+        except p.toolkit.ObjectNotFound:
+            pass
+    except KeyError:
+        pass
+    return schema
+
+
 class DCATPlugin(p.SingletonPlugin, DefaultTranslation):
 
     p.implements(p.IConfigurer, inherit=True)
@@ -38,6 +53,7 @@ class DCATPlugin(p.SingletonPlugin, DefaultTranslation):
     p.implements(p.ITranslation, inherit=True)
     p.implements(p.IClick)
     p.implements(p.IBlueprint)
+    p.implements(p.IValidators)
 
     # IClick
 
@@ -101,17 +117,31 @@ def get_auth_functions(self):
             'dcat_catalog_search': dcat_auth,
         }
 
+    # IValidators
+    def get_validators(self):
+        return dcat_validators
+
     # IPackageController
 
     # CKAN < 2.10 hooks
     def after_show(self, context, data_dict):
         return self.after_dataset_show(context, data_dict)
 
+    def before_index(self, dataset_dict):
+        return self.before_dataset_index(dataset_dict)
+
     # CKAN >= 2.10 hooks
     def after_dataset_show(self, context, data_dict):
 
+        schema = _get_dataset_schema(data_dict["type"])
         # check if config is enabled to translate keys (default: True)
-        if not p.toolkit.asbool(config.get(TRANSLATE_KEYS_CONFIG, True)):
+        # skip if scheming is enabled, as this will be handled there
+        translate_keys = (
+            p.toolkit.asbool(config.get(TRANSLATE_KEYS_CONFIG, True))
+            and not schema
+        )
+
+        if not translate_keys:
             return data_dict
 
         if context.get('for_view'):
@@ -132,6 +162,52 @@ def set_titles(object_dict):
 
         return data_dict
 
+    def before_dataset_index(self, dataset_dict):
+        schema = _get_dataset_schema(dataset_dict["type"])
+        spatial = None
+        if schema:
+            for field in schema['dataset_fields']:
+                if field['field_name'] in dataset_dict and 'repeating_subfields' in field:
+                    for item in dataset_dict[field['field_name']]:
+                        for key in item:
+                            value = item[key]
+                            if not isinstance(value, dict):
+                                # Index a flattened version
+                                new_key = f'{field["field_name"]}__{key}'
+                                if not dataset_dict.get(new_key):
+                                    dataset_dict[new_key] = value
+                                else:
+                                    dataset_dict[new_key] += ' ' + value
+
+                    subfields = dataset_dict.pop(field['field_name'], None)
+                    if field['field_name'] == 'spatial_coverage':
+                        spatial = subfields
+
+        # Store the first geometry found so ckanext-spatial can pick it up for indexing
+        def _check_for_a_geom(spatial_dict):
+            value = None
+
+            for field in ('geom', 'bbox', 'centroid'):
+                if spatial_dict.get(field):
+                    value = spatial_dict[field]
+                    if isinstance(value, dict):
+                        try:
+                            value = json.dumps(value)
+                            break
+                        except ValueError:
+                            pass
+            return value
+
+        if spatial and not dataset_dict.get('spatial'):
+            for item in spatial:
+                value = _check_for_a_geom(item)
+                if value:
+                    dataset_dict['spatial'] = value
+                    dataset_dict['extras_spatial'] = value
+                    break
+
+        return dataset_dict
+
 
 class DCATJSONInterface(p.SingletonPlugin):
     p.implements(p.IActions)

diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py
@@ -33,12 +33,15 @@
 
 class RDFProcessor(object):
 
-    def __init__(self, profiles=None, compatibility_mode=False):
+    def __init__(self, profiles=None, dataset_type='dataset', compatibility_mode=False):
         '''
         Creates a parser or serializer instance
 
         You can optionally pass a list of profiles to be used.
 
+        A scheming dataset type can be provided, in which case the scheming schema
+        will be loaded by the base profile so it can be used by other profiles.
+
         In compatibility mode, some fields are modified to maintain
         compatibility with previous versions of the ckanext-dcat parsers
         (eg adding the `dcat_` prefix or storing comma separated lists instead
@@ -56,6 +59,8 @@ def __init__(self, profiles=None, compatibility_mode=False):
             raise RDFProfileException(
                 'No suitable RDF profiles could be loaded')
 
+        self.dataset_type = dataset_type
+
         if not compatibility_mode:
             compatibility_mode = p.toolkit.asbool(
                 config.get(COMPAT_MODE_CONFIG_OPTION, False))
@@ -177,11 +182,16 @@ def datasets(self):
         for dataset_ref in self._datasets():
             dataset_dict = {}
             for profile_class in self._profiles:
-                profile = profile_class(self.g, self.compatibility_mode)
+                profile = profile_class(
+                    self.g,
+                    dataset_type=self.dataset_type,
+                    compatibility_mode=self.compatibility_mode
+                )
                 profile.parse_dataset(dataset_dict, dataset_ref)
 
             yield dataset_dict
 
+
 class RDFSerializer(RDFProcessor):
     '''
     A CKAN to RDF serializer based on rdflib
@@ -245,7 +255,7 @@ def graph_from_dataset(self, dataset_dict):
         dataset_ref = URIRef(dataset_uri(dataset_dict))
 
         for profile_class in self._profiles:
-            profile = profile_class(self.g, self.compatibility_mode)
+            profile = profile_class(self.g, compatibility_mode=self.compatibility_mode)
             profile.graph_from_dataset(dataset_dict, dataset_ref)
 
         return dataset_ref
@@ -263,7 +273,7 @@ def graph_from_catalog(self, catalog_dict=None):
         catalog_ref = URIRef(catalog_uri())
 
         for profile_class in self._profiles:
-            profile = profile_class(self.g, self.compatibility_mode)
+            profile = profile_class(self.g, compatibility_mode=self.compatibility_mode)
             profile.graph_from_catalog(catalog_dict, catalog_ref)
 
         return catalog_ref

diff --git a/ckanext/dcat/profiles/__init__.py b/ckanext/dcat/profiles/__init__.py
@@ -20,4 +20,5 @@
 
 from .euro_dcat_ap import EuropeanDCATAPProfile
 from .euro_dcat_ap_2 import EuropeanDCATAP2Profile
+from .euro_dcat_ap_scheming import EuropeanDCATAPSchemingProfile
 from .schemaorg import SchemaOrgProfile