Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
Checking mergeability… Don't worry, you can still create the pull request.
  • 4 commits
  • 5 files changed
  • 0 commit comments
  • 1 contributor
View
3  akara.conf
@@ -132,7 +132,8 @@ MODULES = [
"dplaingestion.akamod.edan_to_dpla",
"dplaingestion.akamod.dpla-get-record",
"dplaingestion.akamod.primo-to-dpla",
- "dplaingestion.akamod.mwdl_enrich_state_located_in"
+ "dplaingestion.akamod.mwdl_enrich_state_located_in",
+ "dplaingestion.akamod.edan_select_id"
]
### Section 3: Other module configuration goes here
View
60 lib/akamod/edan_select_id.py
@@ -0,0 +1,60 @@
+import hashlib
+from amara.thirdparty import json
+from amara.lib.iri import is_absolute
+from akara.services import simple_service
+from akara.util import copy_headers_to_dict
+from akara import request, response, logger
+from dplaingestion.selector import getprop, setprop, exists
+
+COUCH_ID_BUILDER = lambda src, lname: "--".join((src,lname))
+COUCH_REC_ID_BUILDER = lambda src, id_handle: COUCH_ID_BUILDER(src,id_handle.strip().replace(" ","__"))
+
+@simple_service('POST', 'http://purl.org/la/dp/edan_select_id', 'edan_select_id', 'application/json')
+def selid(body,ctype,prop='descriptiveNonRepeating/record_link', alternative_prop='descriptiveNonRepeating/record_ID'):
+ '''
+ Service that accepts a JSON document and adds or sets the "id" property to the
+ value of the property named by the "prop" paramater
+ '''
+ tmpl="http://collections.si.edu/search/results.htm?q=record_ID%%3A%s&repo=DPLA"
+
+ if not prop:
+ response.code = 500
+ response.add_header('content-type','text/plain')
+ return "No id property has been selected"
+
+ try :
+ data = json.loads(body)
+ except:
+ response.code = 500
+ response.add_header('content-type','text/plain')
+ return "Unable to parse body as JSON"
+
+ request_headers = copy_headers_to_dict(request.environ)
+ source_name = request_headers.get('Source')
+
+ id = None
+
+ if exists(data, prop) or exists(data, alternative_prop):
+ v = getprop(data,prop, True)
+ if not v:
+ v = getprop(data, alternative_prop)
+ v = tmpl % v
+ if isinstance(v,basestring):
+ id = v
+ else:
+ if v:
+ for h in v:
+ if is_absolute(h):
+ id = h
+ if not id:
+ id = v[0]
+
+ if not id:
+ response.code = 500
+ response.add_header('content-type','text/plain')
+ return "No id property was found"
+
+ data[u'_id'] = COUCH_REC_ID_BUILDER(source_name, id)
+ data[u'id'] = hashlib.md5(data[u'_id']).hexdigest()
+
+ return json.dumps(data)
View
2  lib/akamod/edan_to_dpla.py
@@ -114,7 +114,7 @@ def source_transform(d):
def transform_is_shown_at(d):
propname = "descriptiveNonRepeating/record_link"
- obj = getprop(d, propname, False)
+ obj = getprop(d, propname, True)
return {"isShownAt": obj} if obj else {}
View
4 profiles/smithsonian.pjs
@@ -3,7 +3,7 @@
"name": "smithsonian",
"list_sets": "",
"enrichments_rec": [
- "/select-id?prop=descriptiveNonRepeating%2Frecord_link",
+ "/edan_select_id",
"/edan_to_dpla",
"/shred?prop=aggregatedCHO%2Fcontributor%2CaggregatedCHO%2Fcreator%2CaggregatedCHO%2Fdate",
"/shred?prop=aggregatedCHO%2Flanguage%2CaggregatedCHO%2Fpublisher%2CaggregatedCHO%2Frelation",
@@ -25,5 +25,5 @@
"name": "Smithsonian Institutions"
},
"type": "edan",
- "endpoint_URL": "file:/home/szymon/smithsonian/"
+ "endpoint_URL": "file:/home/szymon/smithsonian_demo/"
}
View
3  scripts/poll_profiles
@@ -288,6 +288,7 @@ def normalize_collection_name(collection_name):
import re
x = re.sub(r'[/() \t]', r'_', collection_name)
x = re.sub(r'_+', r'_', x)
+ x = re.sub(r':', r'_', x)
return x.lower()
@@ -329,7 +330,7 @@ def process_edan_all(profile, blacklist=None):
if not "#text" in c:
continue
- hid = c["#text"]
+ hid = normalize_collection_name(c["#text"])
htitle = c["#text"]
if hid not in collections:

No commit comments for this range

Something went wrong with that request. Please try again.