Skip to content

Commit

Permalink
[#56] Store geometry in spatial field for indexing
Browse files Browse the repository at this point in the history
If the `spatial_coverage` field is present, store the first geometry
found so ckanext-spatial can pick it up for spatial indexing.

Added indexing tests
  • Loading branch information
amercader committed May 30, 2024
1 parent aa23a70 commit 4256e73
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 7 deletions.
36 changes: 32 additions & 4 deletions ckanext/dcat/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from builtins import object
import os
import json

from ckantoolkit import config

Expand Down Expand Up @@ -146,6 +147,7 @@ def before_dataset_index(self, dataset_dict):
except KeyError:
pass

spatial = None
if schema:
for field in schema['dataset_fields']:
if field['field_name'] in dataset_dict and 'repeating_subfields' in field:
Expand All @@ -156,10 +158,36 @@ def before_dataset_index(self, dataset_dict):
# Index a flattened version
new_key = f'{field["field_name"]}__{key}'
if not dataset_dict.get(new_key):
dataset_dict[new_key] = ""
dataset_dict[new_key] += " " + value

dataset_dict.pop(field['field_name'], None)
dataset_dict[new_key] = value
else:
dataset_dict[new_key] += ' ' + value

subfields = dataset_dict.pop(field['field_name'], None)
if field['field_name'] == 'spatial_coverage':
spatial = subfields

# Store the first geometry found so ckanext-spatial can pick it up for indexing
def _check_for_a_geom(spatial_dict):
value = None

for field in ('geom', 'bbox', 'centroid'):
if spatial_dict.get(field):
value = spatial_dict[field]
if isinstance(value, dict):
try:
value = json.dumps(value)
break
except ValueError:
pass
return value

if spatial and not dataset_dict.get('spatial'):
for item in spatial:
value = _check_for_a_geom(item)
if value:
dataset_dict['spatial'] = value
dataset_dict['extras_spatial'] = value
break

return dataset_dict

Expand Down
82 changes: 79 additions & 3 deletions ckanext/dcat/tests/test_scheming_support.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from unittest import mock
import json

import pytest
from rdflib.namespace import RDF
from rdflib.term import URIRef
Expand Down Expand Up @@ -311,9 +314,7 @@ def test_e2e_ckan_to_dcat(self):
GEOJSON_IMT,
)
# Geometry in WKT
wkt_geom = wkt.dumps(
dataset["spatial_coverage"][0]["geom"], decimals=4
)
wkt_geom = wkt.dumps(dataset["spatial_coverage"][0]["geom"], decimals=4)
assert self._triple(g, spatial[0][2], LOCN.geometry, wkt_geom, GSP.wktLiteral)

distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
Expand Down Expand Up @@ -606,3 +607,78 @@ def test_e2e_dcat_to_ckan(self):
assert resource["access_services"][0]["endpoint_url"] == [
"http://publications.europa.eu/webapi/rdf/sparql"
]


@pytest.mark.usefixtures("with_plugins", "clean_db")
@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
@pytest.mark.ckan_config(
"scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml"
)
@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json")
@pytest.mark.ckan_config(
"ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming"
)
class TestSchemingIndexFields:
def test_repeating_subfields_index(self):

dataset_dict = {
# Core fields
"name": "test-dataset",
"title": "Test DCAT dataset",
"notes": "Some notes",
# Repeating subfields
"contact": [
{"name": "Contact 1", "email": "contact1@example.org"},
{"name": "Contact 2", "email": "contact2@example.org"},
],
}

with mock.patch("ckan.lib.search.index.make_connection") as m:
call_action("package_create", **dataset_dict)

# Dict sent to Solr
search_dict = m.mock_calls[1].kwargs["docs"][0]
assert search_dict["contact__name"] == "Contact 1 Contact 2"
assert (
search_dict["contact__email"]
== "contact1@example.org contact2@example.org"
)

def test_spatial_field(self):

dataset_dict = {
# Core fields
"name": "test-dataset",
"title": "Test DCAT dataset",
"notes": "Some notes",
"spatial_coverage": [
{
"uri": "https://sws.geonames.org/6361390/",
"centroid": {"type": "Point", "coordinates": [1.26639, 41.12386]},
},
{
"geom": {
"type": "Polygon",
"coordinates": [
[
[11.9936, 54.0486],
[11.9936, 54.2466],
[12.3045, 54.2466],
[12.3045, 54.0486],
[11.9936, 54.0486],
]
],
},
"text": "Tarragona",
},
],
}

with mock.patch("ckan.lib.search.index.make_connection") as m:
call_action("package_create", **dataset_dict)

# Dict sent to Solr
search_dict = m.mock_calls[1].kwargs["docs"][0]
assert search_dict["spatial"] == json.dumps(
dataset_dict["spatial_coverage"][0]["centroid"]
)

0 comments on commit 4256e73

Please sign in to comment.