Skip to content
This repository has been archived by the owner on Dec 18, 2019. It is now read-only.

Commit

Permalink
dataProvider creation from spatial and "Repository:" substring cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Aleksey Rosolovskiy authored and Aleksey Rosolovskiy committed Mar 1, 2013
1 parent da0d4e6 commit 455df46
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 2 deletions.
3 changes: 2 additions & 1 deletion akara.conf
Expand Up @@ -125,7 +125,8 @@ MODULES = [
"dplaingestion.akamod.georgia_identify_object",
"dplaingestion.akamod.bhl_contributor_to_collection",
"dplaingestion.akamod.copy_prop",
"dplaingestion.akamod.cleanup_value"
"dplaingestion.akamod.cleanup_value",
"dplaingestion.akamod.artstor_cleanup"
]

### Section 3: Other module configuration goes here
Expand Down
45 changes: 45 additions & 0 deletions lib/akamod/artstor_cleanup.py
@@ -0,0 +1,45 @@
"""
Artstor specific module for cleaning data;
"""

__author__ = 'aleksey'

from akara import logger
from akara import response
from akara.services import simple_service
from amara.thirdparty import json

from dplaingestion import selector


HTTP_INTERNAL_SERVER_ERROR = 500
HTTP_TYPE_JSON = 'application/json'
HTTP_TYPE_TEXT = 'text/plain'
HTTP_HEADER_TYPE = 'Content-Type'


@simple_service('POST', 'http://purl.org/la/dp/artstor_cleanup', 'artstor_cleanup', HTTP_TYPE_JSON)
def artstor_cleanup(body, ctype):

try:
assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON)
data = json.loads(body)
except Exception as e:
error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e))
logger.exception(error_text)
response.code = HTTP_INTERNAL_SERVER_ERROR
response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT)
return error_text


data_provider_key = u"dataProvider"
if selector.exists(data, data_provider_key):
for item in selector.getprop(data, data_provider_key):
for k in item:
if k == "name":
value = item[k]
item[k] = value.replace("Repository:", "").lstrip()

return json.dumps(data)


2 changes: 2 additions & 0 deletions profiles/artstor.pjs
Expand Up @@ -22,6 +22,8 @@
"http://localhost:8879/enrich-format",
"http://localhost:8879/artstor_identify_object",
"http://localhost:8879/filter_paths?paths=aggregatedCHO%2Fspatial%2CaggregatedCHO%2Frights%2CisShownAt%2Frights%2Cobject%2Frights",
"http://localhost:8879/copy_prop?prop=aggregatedCHO%2Fspatial&to_prop=dataProvider&create=True",
"http://localhost:8879/artstor_cleanup",
"http://localhost:8879/enrich_location"
],
"subresources": [
Expand Down
3 changes: 2 additions & 1 deletion test/server_support.py
Expand Up @@ -115,7 +115,8 @@ class Akara:
"dplaingestion.akamod.georgia_identify_object",
"dplaingestion.akamod.bhl_contributor_to_collection",
"dplaingestion.akamod.copy_prop",
"dplaingestion.akamod.cleanup_value"
"dplaingestion.akamod.cleanup_value",
"dplaingestion.akamod.artstor_cleanup"
]
class download_preview:
Expand Down
175 changes: 175 additions & 0 deletions test/test_artstor.py
Expand Up @@ -2,6 +2,7 @@

from amara.thirdparty import json
from nose.tools import nottest
from dict_differ import DictDiffer

def test_artstor_identify_object():
"""Fetching Artstor document thumbnail (schema v3)"""
Expand Down Expand Up @@ -267,5 +268,179 @@ def test_artstor_source_fetching():
assert FETCHED_SOURCE == EXPECTED_SOURCE


def test_artstor_data_provider_copy_prop():
"""
Copy aggregatedCHO/spatial as dataProvider
"""

INPUT_JSON = """
{
"_id": "artstor--oai:oaicat.oclc.org:AKRESS_10310356237",
"_rev": "1-48c7056794bbfbc0cb4f613e7c178c55",
"admin": {
"object_status": null
},
"@id": "http://dp.la/api/items/6ae54cee603f75c275fd913e04c49a3f",
"object": {
"rights": [
"Please note that if this image is under copyright, you may need to contact one or more copyright owners for any use that is not permitted under the ARTstor Terms and Conditions of Use or not otherwise permitted by law. While ARTstor tries to update contact information, it cannot guarantee that such information is always accurate. Determining whether those permissions are necessary, and obtaining such permissions, is your sole responsibility."
],
"@id": "http://media.artstor.net/imgstor/size2/kress/d0001/kress_1103_post.jpg",
"format": null
},
"aggregatedCHO": {
"rights": [
"Please note that if this image is under copyright, you may need to contact one or more copyright owners for any use that is not permitted under the ARTstor Terms and Conditions of Use or not otherwise permitted by law. While ARTstor tries to update contact information, it cannot guarantee that such information is always accurate. Determining whether those permissions are necessary, and obtaining such permissions, is your sole responsibility."
],
"title": "The Annunciation",
"creator": "Girolamo da Santa Croce, Italian, active 1503-1556",
"physicalMedium": [
"Paintings",
"Painting",
"55.6 x 71.1 cm",
"Oil on wood panel"
],
"relation": "",
"spatial": [
{
"state": "South Carolina",
"name": "Repository: Columbia Museum of Art, Columbia, SC",
"iso3166-2": "US-SC"
}
],
"date": {
"begin": "1540",
"end": "1540",
"displayDate": "c. 1540"
},
"type": null,
"subject": [
{
"name": "Annunciation: Mary, Usually Reading, Is Visited by the Angel"
}
]
},
"ingestDate": "2013-03-01T16:58:52.506790",
"collection": {
"@id": "http://dp.la/api/collections/artstor--SetDPLA",
"name": "SetDPLA",
"description": "SetDPLA"
},
"isShownAt": {
"@id": "http://www.artstor.org/artstor/ViewImages?id=8DtZYyMmJloyLyw7eDt5QHgt&userId=gDBAdA%3D%3D",
"format": null
},
"provider": {
"@id": "http://dp.la/api/contributor/artstor",
"name": "ARTstor OAICatMuseum"
},
"@context": {
"begin": {
"@id": "dpla:dateRangeStart",
"@type": "xsd:date"
},
"@vocab": "http://purl.org/dc/terms/",
"hasView": "edm:hasView",
"name": "xsd:string",
"object": "edm:object",
"aggregatedCHO": "edm:aggregatedCHO",
"dpla": "http://dp.la/terms/",
"collection": "dpla:aggregation",
"edm": "http://www.europeana.eu/schemas/edm/",
"state": "dpla:state",
"aggregatedDigitalResource": "dpla:aggregatedDigitalResource",
"coordinates": "dpla:coordinates",
"isShownAt": "edm:isShownAt",
"provider": "edm:provider",
"stateLocatedIn": "dpla:stateLocatedIn",
"end": {
"@id": "dpla:dateRangeEnd",
"@type": "xsd:date"
},
"dataProvider": "edm:dataProvider",
"originalRecord": "dpla:originalRecord",
"LCSH": "http://id.loc.gov/authorities/subjects"
},
"ingestType": "item",
"originalRecord": {
"rights": [
"",
"Please note that if this image is under copyright, you may need to contact one or more copyright owners for any use that is not permitted under the ARTstor Terms and Conditions of Use or not otherwise permitted by law. While ARTstor tries to update contact information, it cannot guarantee that such information is always accurate. Determining whether those permissions are necessary, and obtaining such permissions, is your sole responsibility."
],
"handle": [
"Thumbnail: http://media.artstor.net/imgstor/size2/kress/d0001/kress_1103_post.jpg",
"Image View: http://www.artstor.org/artstor/ViewImages?id=8DtZYyMmJloyLyw7eDt5QHgt&userId=gDBAdA%3D%3D",
"Ranking: 13000"
],
"creator": "Girolamo da Santa Croce, Italian, active 1503-1556",
"format": [
"55.6 x 71.1 cm",
"Oil on wood panel"
],
"label": "The Annunciation",
"datestamp": "2011-11-07",
"relation": "",
"coverage": [
"",
"Repository: Columbia Museum of Art, Columbia, SC"
],
"date": "c. 1540",
"title": "The Annunciation",
"type": [
"Paintings",
"Painting"
],
"id": "oai:oaicat.oclc.org:AKRESS_10310356237",
"subject": "Annunciation: Mary, Usually Reading, Is Visited by the Angel"
},
"id": "6ae54cee603f75c275fd913e04c49a3f"
}
"""

EXPECTED_DATA_PROVIDER = [
{
"state": "South Carolina",
"name": "Repository: Columbia Museum of Art, Columbia, SC",
"iso3166-2": "US-SC"
}
]
url = server() + "copy_prop?prop=aggregatedCHO%2Fspatial&to_prop=dataProvider&create=True"
resp, content = H.request(url, "POST", body=INPUT_JSON)
assert str(resp.status).startswith("2")
data = json.loads(content)
assert "dataProvider" in data, "No dataProvider field in document"
assert data["dataProvider"] == EXPECTED_DATA_PROVIDER


def test_artstor_cleanup_data_provider():
"""
Remove "Repository:" from Artstor data provider
"""

INPUT = {
"dataProvider": [
{
"state": "South Carolina",
"name": "Repository: Columbia Museum of Art, Columbia, SC",
"iso3166-2": "US-SC"
}
]
}
EXPECTED = {
"dataProvider": [
{
"state": "South Carolina",
"name": "Columbia Museum of Art, Columbia, SC",
"iso3166-2": "US-SC"
}
]
}
url = server() + "artstor_cleanup"
resp, content = H.request(url, "POST", body=json.dumps(INPUT))
assert str(resp.status).startswith("2")
data = json.loads(content)
assert data["dataProvider"] == EXPECTED["dataProvider"], DictDiffer(EXPECTED, data).diff()


if __name__ == "__main__":
raise SystemExit("Use nosetests")

0 comments on commit 455df46

Please sign in to comment.