Skip to content

Commit

Permalink
[#56] Add support for spatial_coverage
Browse files Browse the repository at this point in the history
New repeating subfield, supporting all properties for the location
class: uri, text, geom, bbox and centroid.
Used spatial_coverage as name to not interfere with the `spatial` field
expected by ckanext-scheming, in a future commit we will extract the
relevant value to index it as a geometry.
  • Loading branch information
amercader committed May 30, 2024
1 parent cd1d3f0 commit 103aa08
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 9 deletions.
10 changes: 6 additions & 4 deletions ckanext/dcat/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,16 @@ def before_dataset_index(self, dataset_dict):
pass

if schema:
# TODO: https://github.com/ckan/ckanext-dcat/pull/281#discussion_r1610549936
for field in schema['dataset_fields']:
if field['field_name'] in dataset_dict and 'repeating_subfields' in field:
for index, item in enumerate(dataset_dict[field['field_name']]):
for key in item:
# Index a flattened version
new_key = f'{field["field_name"]}_{index}_{key}'

dataset_dict[new_key] = dataset_dict[field['field_name']][index][key]
value = dataset_dict[field['field_name']][index][key]
if not isinstance(value, dict):
# Index a flattened version
new_key = f'{field["field_name"]}_{index}_{key}'
dataset_dict[new_key] = value
dataset_dict.pop(field['field_name'], None)

return dataset_dict
Expand Down
6 changes: 4 additions & 2 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,17 +702,19 @@ def _add_spatial_value_to_graph(self, spatial_ref, predicate, value):
self.g.add((spatial_ref, predicate, Literal(value, datatype=GEOJSON_IMT)))
# WKT, because GeoDCAT-AP says so
try:
if isinstance(value, str):
value = json.loads(value)
self.g.add(
(
spatial_ref,
predicate,
Literal(
wkt.dumps(json.loads(value), decimals=4),
wkt.dumps(value, decimals=4),
datatype=GSP.wktLiteral,
),
)
)
except (TypeError, ValueError, InvalidGeoJSONException):
except (TypeError, ValueError, InvalidGeoJSONException) as e:
pass

def _add_spatial_to_dict(self, dataset_dict, key, spatial):
Expand Down
27 changes: 25 additions & 2 deletions ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from rdflib import URIRef, BNode
from rdflib import URIRef, BNode, Literal
from .base import RDFProfile, CleanedURIRef, URIRefOrLiteral
from .base import (
RDF,
Expand Down Expand Up @@ -74,7 +74,7 @@ def _parse_list_value(data_dict, field_name):
field_name = schema_field["field_name"]
new_extras = []
new_dict = {}
check_name = new_fields_mappings.get(field_name, field_name)
check_name = new_fields_mapping.get(field_name, field_name)
for extra in dataset_dict.get("extras", []):
if extra["key"].startswith(f"{check_name}_"):
subfield = extra["key"][extra["key"].index("_") + 1 :]
Expand Down Expand Up @@ -173,6 +173,29 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
self._add_date_triple(temporal_ref, SCHEMA.endDate, item["end"])
self.g.add((dataset_ref, DCT.temporal, temporal_ref))

spatial = dataset_dict.get("spatial_coverage")
if isinstance(spatial, list) and len(spatial):
for item in spatial:
if item.get("uri"):
spatial_ref = CleanedURIRef(item["uri"])
else:
spatial_ref = BNode()
self.g.add((spatial_ref, RDF.type, DCT.Location))
self.g.add((dataset_ref, DCT.spatial, spatial_ref))

if item.get("text"):
self.g.add((spatial_ref, SKOS.prefLabel, Literal(item["text"])))

for field in [
("geom", LOCN.geometry),
("bbox", DCAT.bbox),
("centroid", DCAT.centroid),
]:
if item.get(field[0]):
self._add_spatial_value_to_graph(
spatial_ref, field[1], item[field[0]]
)

resources = dataset_dict.get("resources", [])
for resource in resources:
if resource.get("access_services"):
Expand Down
19 changes: 19 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_2.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,25 @@ dataset_fields:
label: End
# TODO: dcat_date preset

- field_name: spatial_coverage
label: Spatial coverage
repeating_subfields:

- field_name: uri
label: URI

- field_name: text
label: Label

- field_name: geom
label: Geometry

- field_name: bbox
label: Bounding Box

- field_name: centroid
label: Centroid

- field_name: access_rights
label: Access rights
validators: ignore_missing unicode_safe
Expand Down
61 changes: 60 additions & 1 deletion ckanext/dcat/tests/test_scheming_support.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from rdflib.namespace import RDF
from rdflib.term import URIRef
from geomet import wkt

from ckan.tests import factories
from ckan.tests.helpers import call_action
Expand All @@ -20,10 +20,15 @@
LOCN,
GSP,
OWL,
GEOJSON_IMT,
)
from ckanext.dcat.tests.utils import BaseSerializeTest, BaseParseTest


# TODO: tests for spatial coverage
# TODO: index "spatial" extra


@pytest.mark.usefixtures("with_plugins", "clean_db")
@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
@pytest.mark.ckan_config(
Expand Down Expand Up @@ -84,6 +89,37 @@ def test_e2e_ckan_to_dcat(self):
{"start": "1905-03-01", "end": "2013-01-05"},
{"start": "2024-04-10", "end": "2024-05-29"},
],
"spatial_coverage": [
{
"geom": {
"type": "Polygon",
"coordinates": [
[
[11.9936, 54.0486],
[11.9936, 54.2466],
[12.3045, 54.2466],
[12.3045, 54.0486],
[11.9936, 54.0486],
]
],
},
"text": "Tarragona",
"uri": "https://sws.geonames.org/6361390/",
"bbox": {
"type": "Polygon",
"coordinates": [
[
[-2.1604, 42.7611],
[-2.0938, 42.7611],
[-2.0938, 42.7931],
[-2.1604, 42.7931],
[-2.1604, 42.7611],
]
],
},
"centroid": {"type": "Point", "coordinates": [1.26639, 41.12386]},
}
],
"resources": [
{
"name": "Resource 1",
Expand Down Expand Up @@ -257,6 +293,29 @@ def test_e2e_ckan_to_dcat(self):
data_type=XSD.dateTime,
)

spatial = [t for t in g.triples((dataset_ref, DCT.spatial, None))]
assert len(spatial) == len(dataset["spatial_coverage"])
assert str(spatial[0][2]) == dataset["spatial_coverage"][0]["uri"]
assert self._triple(g, spatial[0][2], RDF.type, DCT.Location)
assert self._triple(
g, spatial[0][2], SKOS.prefLabel, dataset["spatial_coverage"][0]["text"]
)

assert len([t for t in g.triples((spatial[0][2], LOCN.geometry, None))]) == 2
# Geometry in GeoJSON
assert self._triple(
g,
spatial[0][2],
LOCN.geometry,
dataset["spatial_coverage"][0]["geom"],
GEOJSON_IMT,
)
# Geometry in WKT
wkt_geom = wkt.dumps(
dataset["spatial_coverage"][0]["geom"], decimals=4
)
assert self._triple(g, spatial[0][2], LOCN.geometry, wkt_geom, GSP.wktLiteral)

distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

# Resources: core fields
Expand Down

0 comments on commit 103aa08

Please sign in to comment.