Skip to content

Commit

Permalink
[#56] Add rest of DCAT-AP 1 and 2.1 fields
Browse files Browse the repository at this point in the history
At least the ones supported by the current processors.

TODO:
* spatial_resolution in meters: needs a new multiple_text_decimal validator
* hvd_category: will be done as part of the wider HVD work
  • Loading branch information
amercader committed Jun 3, 2024
1 parent 4256e73 commit afb74d1
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 22 deletions.
56 changes: 56 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_2.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ dataset_fields:
label: End
# TODO: dcat_date preset

- field_name: temporal_resolution
label: Temporal resolution
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: spatial_coverage
label: Spatial coverage
repeating_subfields:
Expand All @@ -139,6 +144,12 @@ dataset_fields:
- field_name: centroid
label: Centroid

#- field_name: spatial_resolution_in_meters
# label: Spatial resolution in meters
# preset: multiple_text
# validators: ignore_missing scheming_multiple_text
# TODO: scheming_multiple_decimal

- field_name: access_rights
label: Access rights
validators: ignore_missing unicode_safe
Expand Down Expand Up @@ -175,6 +186,23 @@ dataset_fields:
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: is_referenced_by
label: Is referenced by
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: applicable_legislation
label: Applicable legislation
preset: multiple_text
validators: ignore_missing scheming_multiple_text

#- field_name: hvd_category
# label: HVD Category
# preset: multiple_text
# validators: ignore_missing scheming_multiple_text
# TODO: implement separately as part of wider HVD support


# Note: if not provided, this will be autogenerated
- field_name: uri
label: URI
Expand All @@ -199,15 +227,37 @@ resource_fields:
label: Format
preset: resource_format_autocomplete

- field_name: mimetype
label: Media type
# TODO: get from format

- field_name: compress_format
label: Compress format
# TODO: media type validator

- field_name: package_format
label: Package format
# TODO: media type validator

- field_name: size
label: Size
# TODO: number validator / snippet

- field_name: hash
label: Hash
# TODO: generate for uploads?

- field_name: hash_algorithm
label: Hash Algorithm

- field_name: rights
label: Rights
form_snippet: markdown.html
form_placeholder: Some statement about the rights associated with the resource

- field_name: availability
label: Availability

- field_name: status
label: Status

Expand All @@ -233,6 +283,7 @@ resource_fields:
- field_name: language
label: Language
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: documentation
label: Documentation
Expand All @@ -244,6 +295,11 @@ resource_fields:
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: applicable_legislation
label: Applicable legislation
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: access_services
label: Access services
repeating_label: Access service
Expand Down
131 changes: 109 additions & 22 deletions ckanext/dcat/tests/test_scheming_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ckanext.dcat.processors import RDFSerializer, RDFParser
from ckanext.dcat.profiles import (
DCAT,
DCATAP,
DCT,
ADMS,
XSD,
Expand All @@ -24,6 +25,7 @@
GSP,
OWL,
GEOJSON_IMT,
SPDX,
)
from ckanext.dcat.tests.utils import BaseSerializeTest, BaseParseTest

Expand Down Expand Up @@ -75,6 +77,14 @@ def test_e2e_ckan_to_dcat(self):
"language": ["en", "ca", "es"],
"documentation": ["https://example.org/some-doc.html"],
"conforms_to": ["Standard 1", "Standard 2"],
"is_referenced_by": [
"https://doi.org/10.1038/sdata.2018.22",
"test_isreferencedby",
],
"applicable_legislation": [
"http://data.europa.eu/eli/reg_impl/2023/138/oj",
"http://data.europa.eu/eli/reg_impl/2023/138/oj_alt",
],
# Repeating subfields
"contact": [
{"name": "Contact 1", "email": "contact1@example.org"},
Expand All @@ -92,6 +102,7 @@ def test_e2e_ckan_to_dcat(self):
{"start": "1905-03-01", "end": "2013-01-05"},
{"start": "2024-04-10", "end": "2024-05-29"},
],
"temporal_resolution": ["PT15M", "P1D"],
"spatial_coverage": [
{
"geom": {
Expand Down Expand Up @@ -123,12 +134,19 @@ def test_e2e_ckan_to_dcat(self):
"centroid": {"type": "Point", "coordinates": [1.26639, 41.12386]},
}
],
"spatial_resolution_in_meters": [1.5, 2.0],
"resources": [
{
"name": "Resource 1",
"description": "Some description",
"url": "https://example.com/data.csv",
"format": "CSV",
"availability": "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL",
"compress_format": "http://www.iana.org/assignments/media-types/application/gzip",
"package_format": "http://publications.europa.eu/resource/authority/file-type/TAR",
"size": 12323,
"hash": "4304cf2e751e6053c90b1804c89c0ebb758f395a",
"hash_algorithm": "http://spdx.org/rdf/terms#checksumAlgorithm_sha1",
"status": "published",
"access_url": "https://example.com/data.csv",
"download_url": "https://example.com/data.csv",
Expand Down Expand Up @@ -214,6 +232,24 @@ def test_e2e_ckan_to_dcat(self):
self._triples_list_values(g, dataset_ref, FOAF.page)
== dataset["documentation"]
)
assert (
self._triples_list_values(g, dataset_ref, DCAT.temporalResolution)
== dataset["temporal_resolution"]
)
assert (
self._triples_list_values(g, dataset_ref, DCT.isReferencedBy)
== dataset["is_referenced_by"]
)
assert (
self._triples_list_values(g, dataset_ref, DCATAP.applicableLegislation)
== dataset["applicable_legislation"]
)

# TODO: enable after validator
# assert (
# self._triples_list_values(g, dataset_ref, DCAT.spatialResolutionInMeters)
# == dataset["spatial_resolution_in_meters"]
# )

# Repeating subfields

Expand Down Expand Up @@ -318,38 +354,67 @@ def test_e2e_ckan_to_dcat(self):
assert self._triple(g, spatial[0][2], LOCN.geometry, wkt_geom, GSP.wktLiteral)

distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
resource = dataset_dict["resources"][0]

# Resources: core fields

assert self._triple(
g, distribution_ref, DCT.title, dataset_dict["resources"][0]["name"]
)
assert self._triple(g, distribution_ref, DCT.title, resource["name"])
assert self._triple(
g,
distribution_ref,
DCT.description,
dataset_dict["resources"][0]["description"],
resource["description"],
)

# Resources: standard fields

assert self._triple(g, distribution_ref, DCT.rights, resource["rights"])
assert self._triple(g, distribution_ref, ADMS.status, resource["status"])
assert self._triple(
g, distribution_ref, DCT.rights, dataset_dict["resources"][0]["rights"]
g,
distribution_ref,
DCAT.accessURL,
URIRef(resource["access_url"]),
)
assert self._triple(
g, distribution_ref, ADMS.status, dataset_dict["resources"][0]["status"]
g,
distribution_ref,
DCATAP.availability,
URIRef(resource["availability"]),
)
assert self._triple(
g,
distribution_ref,
DCAT.accessURL,
URIRef(dataset_dict["resources"][0]["access_url"]),
DCAT.compressFormat,
URIRef(resource["compress_format"]),
)
assert self._triple(
g,
distribution_ref,
DCAT.packageFormat,
URIRef(resource["package_format"]),
)
assert self._triple(
g,
distribution_ref,
DCAT.downloadURL,
URIRef(dataset_dict["resources"][0]["download_url"]),
URIRef(resource["download_url"]),
)

assert self._triple(g, distribution_ref, DCAT.byteSize, float(resource['size']), XSD.decimal)
# Checksum
checksum = self._triple(g, distribution_ref, SPDX.checksum, None)[2]
assert checksum
assert self._triple(g, checksum, RDF.type, SPDX.Checksum)
assert self._triple(
g,
checksum,
SPDX.checksumValue,
resource["hash"],
data_type="http://www.w3.org/2001/XMLSchema#hexBinary",
)
assert self._triple(
g, checksum, SPDX.algorithm, URIRef(resource["hash_algorithm"])
)

# Resources: dates
Expand All @@ -369,11 +434,10 @@ def test_e2e_ckan_to_dcat(self):
)

# Resources: list fields

language = [
str(t[2]) for t in g.triples((distribution_ref, DCT.language, None))
]
assert language == dataset_dict["resources"][0]["language"]
assert (
self._triples_list_values(g, distribution_ref, DCT.language)
== resource["language"]
)

# Resource: repeating subfields
access_services = [
Expand All @@ -385,17 +449,14 @@ def test_e2e_ckan_to_dcat(self):
g,
access_services[0][2],
DCT.title,
dataset_dict["resources"][0]["access_services"][0]["title"],
resource["access_services"][0]["title"],
)

endpoint_urls = [
str(t[2])
for t in g.triples((access_services[0][2], DCAT.endpointURL, None))
]
assert (
endpoint_urls
== dataset_dict["resources"][0]["access_services"][0]["endpoint_url"]
)
assert endpoint_urls == resource["access_services"][0]["endpoint_url"]

def test_publisher_fallback_org(self):

Expand Down Expand Up @@ -555,7 +616,18 @@ def test_e2e_dcat_to_ckan(self):
"http://dataset.info.org/doc1",
"http://dataset.info.org/doc2",
]

assert sorted(dataset["temporal_resolution"]) == [
"P1D",
"PT15M",
]
assert sorted(dataset["is_referenced_by"]) == [
"https://doi.org/10.1038/sdata.2018.22",
"test_isreferencedby",
]
assert sorted(dataset["applicable_legislation"]) == [
"http://data.europa.eu/eli/reg_impl/2023/138/oj",
"http://data.europa.eu/eli/reg_impl/2023/138/oj_alt",
]
# Repeating subfields

assert dataset["contact"][0]["name"] == "Point of Contact"
Expand Down Expand Up @@ -585,9 +657,24 @@ def test_e2e_dcat_to_ckan(self):
assert resource["modified"] == "2012-05-01T00:04:06"
assert resource["status"] == "http://purl.org/adms/status/Completed"
assert resource["size"] == 12323
assert (
resource["availability"]
== "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL"
)
assert (
resource["compress_format"]
== "http://www.iana.org/assignments/media-types/application/gzip"
)
assert (
resource["package_format"]
== "http://publications.europa.eu/resource/authority/file-type/TAR"
)

# assert resource['hash'] == u'4304cf2e751e6053c90b1804c89c0ebb758f395a'
# assert resource['hash_algorithm'] == u'http://spdx.org/rdf/terms#checksumAlgorithm_sha1'
assert resource["hash"] == "4304cf2e751e6053c90b1804c89c0ebb758f395a"
assert (
resource["hash_algorithm"]
== "http://spdx.org/rdf/terms#checksumAlgorithm_sha1"
)

assert (
resource["access_url"] == "http://www.bgs.ac.uk/gbase/geochemcd/home.html"
Expand Down
11 changes: 11 additions & 0 deletions examples/dataset.rdf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
<dct:conformsTo>Standard 2</dct:conformsTo>
<dct:license rdf:resource="https://data.some.org/link/to/license"/>
<dct:spatial rdf:resource="http://publications.europa.eu/mdr/authority/country/ZWE"/>
<dcat:spatialResolutionInMeters rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">1.5</dcat:spatialResolutionInMeters>
<dcat:spatialResolutionInMeters rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">2.0</dcat:spatialResolutionInMeters>
<dct:accrualPeriodicity rdf:resource="http://purl.org/cld/freq/daily"/>
<dct:accessRights>public</dct:accessRights>
<foaf:page rdf:resource="http://dataset.info.org/doc1"/>
Expand All @@ -50,13 +52,19 @@
<dct:isVersionOf rdf:resource="https://data.some.org/catalog/datasets/original-dataset"/>
<dct:source rdf:resource="https://data.some.org/catalog/datasets/source-dataset-1"/>
<dct:source rdf:resource="https://data.some.org/catalog/datasets/source-dataset-2"/>
<dct:isReferencedBy>https://doi.org/10.1038/sdata.2018.22</dct:isReferencedBy>
<dct:isReferencedBy>test_isreferencedby</dct:isReferencedBy>
<dcatap:applicableLegislation rdf:resource="http://data.europa.eu/eli/reg_impl/2023/138/oj" />
<dcatap:applicableLegislation rdf:resource="http://data.europa.eu/eli/reg_impl/2023/138/oj_alt" />
<adms:sample rdf:resource="https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample"/>
<dct:temporal>
<dct:PeriodOfTime>
<schema:startDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">1905-03-01</schema:startDate>
<schema:endDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2013-01-05</schema:endDate>
</dct:PeriodOfTime>
</dct:temporal>
<dcat:temporalResolution rdf:datatype="http://www.w3.org/2001/XMLSchema#duration">PT15M</dcat:temporalResolution>
<dcat:temporalResolution rdf:datatype="http://www.w3.org/2001/XMLSchema#duration">P1D</dcat:temporalResolution>
<dcat:contactPoint>
<vcard:Organization>
<vcard:fn>Point of Contact</vcard:fn>
Expand All @@ -80,9 +88,12 @@
<dct:license rdf:resource="http://creativecommons.org/licenses/by-nc/2.0/"/>
<dct:rights>Some statement about rights</dct:rights>
<adms:status rdf:resource="http://purl.org/adms/status/Completed"/>
<dcatap:availability>http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL</dcatap:availability>
<dcat:accessURL rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">http://www.bgs.ac.uk/gbase/geochemcd/home.html</dcat:accessURL>
<dct:format>HTML</dct:format>
<dcat:mediaType>text/html</dcat:mediaType>
<dcat:compressFormat>http://www.iana.org/assignments/media-types/application/gzip</dcat:compressFormat>
<dcat:packageFormat>http://publications.europa.eu/resource/authority/file-type/TAR</dcat:packageFormat>
<dcat:byteSize>12323</dcat:byteSize>
<foaf:page rdf:resource="http://dataset.info.org/distribution1/doc1"/>
<foaf:page rdf:resource="http://dataset.info.org/distribution1/doc2"/>
Expand Down

0 comments on commit afb74d1

Please sign in to comment.