Skip to content

Commit

Permalink
[#56] [#56] Add sample of resource fields
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed May 20, 2024
1 parent 2d8d969 commit c5865fb
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 50 deletions.
86 changes: 46 additions & 40 deletions ckanext/dcat/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -1752,47 +1752,53 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
]
self._add_list_triples_from_dict(resource_dict, distribution, items)

try:
access_service_list = json.loads(resource_dict.get('access_services', '[]'))
# Access service
for access_service_dict in access_service_list:

access_service_uri = access_service_dict.get('uri')
if access_service_uri:
access_service_node = CleanedURIRef(access_service_uri)
else:
access_service_node = BNode()
# Remember the (internal) access service reference for referencing in
# further profiles
access_service_dict['access_service_ref'] = str(access_service_node)

self.g.add((distribution, DCAT.accessService, access_service_node))

self.g.add((access_service_node, RDF.type, DCAT.DataService))

# Simple values
items = [
('availability', DCATAP.availability, None, URIRefOrLiteral),
('license', DCT.license, None, URIRefOrLiteral),
('access_rights', DCT.accessRights, None, URIRefOrLiteral),
('title', DCT.title, None, Literal),
('endpoint_description', DCAT.endpointDescription, None, Literal),
('description', DCT.description, None, Literal),
]

self._add_triples_from_dict(access_service_dict, access_service_node, items)
# TODO: this will go into a separate profile

# Lists
items = [
('endpoint_url', DCAT.endpointURL, None, URIRefOrLiteral),
('serves_dataset', DCAT.servesDataset, None, URIRefOrLiteral),
]
self._add_list_triples_from_dict(access_service_dict, access_service_node, items)

if access_service_list:
resource_dict['access_services'] = json.dumps(access_service_list)
except ValueError:
pass
access_service_list = resource_dict.get('access_services', [])
if isinstance(access_service_list, str):
try:
access_service_list = json.loads(access_service_list)
except ValueError:
access_service_list = []

# Access service
for access_service_dict in access_service_list:

access_service_uri = access_service_dict.get('uri')
if access_service_uri:
access_service_node = CleanedURIRef(access_service_uri)
else:
access_service_node = BNode()
# Remember the (internal) access service reference for referencing in
# further profiles
access_service_dict['access_service_ref'] = str(access_service_node)

self.g.add((distribution, DCAT.accessService, access_service_node))

self.g.add((access_service_node, RDF.type, DCAT.DataService))

# Simple values
items = [
('availability', DCATAP.availability, None, URIRefOrLiteral),
('license', DCT.license, None, URIRefOrLiteral),
('access_rights', DCT.accessRights, None, URIRefOrLiteral),
('title', DCT.title, None, Literal),
('endpoint_description', DCAT.endpointDescription, None, Literal),
('description', DCT.description, None, Literal),
]

self._add_triples_from_dict(access_service_dict, access_service_node, items)

# Lists
items = [
('endpoint_url', DCAT.endpointURL, None, URIRefOrLiteral),
('serves_dataset', DCAT.servesDataset, None, URIRefOrLiteral),
]
self._add_list_triples_from_dict(access_service_dict, access_service_node, items)

# TODO: re-enable when separating into a profile
# if access_service_list:
# resource_dict['access_services'] = json.dumps(access_service_list)

def graph_from_catalog(self, catalog_dict, catalog_ref):

Expand Down
25 changes: 25 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_2.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ dataset_fields:
- field_name: conforms_to
label: Conforms to
preset: multiple_text
validators: ignore_missing scheming_multiple_text


resource_fields:
Expand All @@ -114,3 +115,27 @@ resource_fields:
- field_name: format
label: Format
preset: resource_format_autocomplete

- field_name: rights
label: Rights
form_snippet: markdown.html
form_placeholder: Some statement about the rights associated with the resource

- field_name: language
label: Language
preset: multiple_text

- field_name: access_services
label: Access services
repeating_label: Access service
repeating_subfields:

- field_name: uri
label: URI

- field_name: title
label: Title

- field_name: endpoint_url
label: Endpoint URL
preset: multiple_text
88 changes: 78 additions & 10 deletions ckanext/dcat/tests/test_scheming_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,24 @@ def test_e2e_ckan_to_dcat(self):
{"name": "Contact 1", "email": "contact1@example.org"},
{"name": "Contact 2", "email": "contact2@example.org"},
],
# TODO: resources
"resources": [
{
"name": "Resource 1",
"url": "https://example.com/data.csv",
"format": "CSV",
"rights": "Some stament about rights",
"language": ["en", "ca", "es"],
"access_services": [
{
"title": "Access Service 1",
"endpoint_url": [
"https://example.org/access_service/1",
"https://example.org/access_service/2",
],
}
],
}
],
}

dataset = call_action("package_create", **dataset_dict)
Expand All @@ -64,7 +81,7 @@ def test_e2e_ckan_to_dcat(self):
assert dataset["conforms_to"][0] == "Standard 1"
assert dataset["contact"][0]["name"] == "Contact 1"

s = RDFSerializer(profiles=["euro_dcat_ap"])
s = RDFSerializer(profiles=["euro_dcat_ap_2"])
g = s.g

dataset_ref = s.graph_from_dataset(dataset)
Expand All @@ -81,17 +98,68 @@ def test_e2e_ckan_to_dcat(self):

# List fields
# TODO helper function
conforms = [t for t in g.triples((dataset_ref, DCT.conformsTo, None))]
assert len(conforms) == len(dataset["conforms_to"])
for index, item in enumerate(conforms):
assert str(item[2]) == dataset["conforms_to"][index]
conforms_to = [
str(t[2]) for t in g.triples((dataset_ref, DCT.conformsTo, None))
]
assert conforms_to == dataset["conforms_to"]

# Repeating subfields

contact_details = [t for t in g.triples((dataset_ref, DCAT.contactPoint, None))]

assert len(contact_details) == len(dataset["contact"])
self._triple(g, contact_details[0][2], VCARD.fn, dataset_dict["contact"][0]["name"])
self._triple(g, contact_details[0][2], VCARD.hasEmail, dataset_dict["contact"][0]["email"])
self._triple(g, contact_details[1][2], VCARD.fn, dataset_dict["contact"][1]["name"])
self._triple(g, contact_details[1][2], VCARD.hasEmail, dataset_dict["contact"][1]["email"])
self._triple(
g, contact_details[0][2], VCARD.fn, dataset_dict["contact"][0]["name"]
)
self._triple(
g,
contact_details[0][2],
VCARD.hasEmail,
dataset_dict["contact"][0]["email"],
)
self._triple(
g, contact_details[1][2], VCARD.fn, dataset_dict["contact"][1]["name"]
)
self._triple(
g,
contact_details[1][2],
VCARD.hasEmail,
dataset_dict["contact"][1]["email"],
)

distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

# Resources: standard fields

assert self._triple(
g, distribution_ref, DCT.rights, dataset_dict["resources"][0]["rights"]
)

# Resources: list fields

language = [
str(t[2]) for t in g.triples((distribution_ref, DCT.language, None))
]
assert language == dataset_dict["resources"][0]["language"]

# Resource: repeating subfields
access_services = [
t for t in g.triples((distribution_ref, DCAT.accessService, None))
]

assert len(access_services) == len(dataset["resources"][0]["access_services"])
self._triple(
g,
access_services[0][2],
DCT.title,
dataset_dict["resources"][0]["access_services"][0]["title"],
)

endpoint_urls = [
str(t[2])
for t in g.triples((access_services[0][2], DCAT.endpointURL, None))
]
assert (
endpoint_urls
== dataset_dict["resources"][0]["access_services"][0]["endpoint_url"]
)

0 comments on commit c5865fb

Please sign in to comment.