Permalink
Browse files

Merge branch 'develop' into smithsonian_ingestion

  • Loading branch information...
2 parents f15842f + a85ca81 commit 51916603ea0390807a4726488f488f81a612036e Szymon Guz committed Mar 15, 2013
View
@@ -129,7 +129,10 @@ MODULES = [
"dplaingestion.akamod.sets_prop",
"dplaingestion.akamod.enrich_language",
"dplaingestion.akamod.arc-to-dpla",
- "dplaingestion.akamod.edan_to_dpla"
+ "dplaingestion.akamod.edan_to_dpla",
+ "dplaingestion.akamod.dpla-get-record",
+ "dplaingestion.akamod.primo-to-dpla",
+ "dplaingestion.akamod.mwdl_enrich_state_located_in"
]
### Section 3: Other module configuration goes here
@@ -180,3 +183,6 @@ class kentucky_identify_object(identify_object):
class artstor_identify_object(identify_object):
pass
+
+class georgia_identify_object(identify_object):
+ pass
@@ -19,15 +19,15 @@ def convert(data, prop):
if exists(data, prop):
v = getprop(data, prop)
if isinstance(v, basestring):
- setprop(data, prop, cleanup(v))
+ setprop(data, prop, cleanup(v, prop))
elif isinstance(v, list):
temp = []
for val in v:
- temp.append(cleanup(val))
+ temp.append(cleanup(val, prop))
setprop(data, prop, temp)
-def cleanup(value):
+def cleanup(value, prop):
""" Performs a cleanup of value using a bunch of regexps.
Arguments:
@@ -36,7 +36,10 @@ def cleanup(value):
Returns:
Converted string.
"""
- TAGS_FOR_STRIPPING = '[\.\' \r\t\n";,]*' # Tags for stripping at beginning and at the end.
+ # Do not remove double quotes from title
+ dquote = '' if prop == "aggregatedCHO/title" else '"'
+ # Tags for stripping at beginning and at the end.
+ TAGS_FOR_STRIPPING = '[\.\' \r\t\n;,%s]*' % dquote
REGEXPS = (' *-- *', '--'), \
('[\t ]{2,}', ' '), \
('^' + TAGS_FOR_STRIPPING, ''), \
View
@@ -7,7 +7,7 @@
@simple_service('POST', 'http://purl.org/la/dp/copy_prop', 'copy_prop',
'application/json')
def copyprop(body,ctype,prop=None,to_prop=None,create=False,key=None,
- remove=None):
+ remove=None,no_replace=None):
"""Copies value in one prop to another prop.
Keyword arguments:
@@ -18,6 +18,7 @@ def copyprop(body,ctype,prop=None,to_prop=None,create=False,key=None,
create -- creates to_prop if True (default False)
key -- the key to use if to_prop is a dict (default None)
remove -- removes prop if True (default False)
+ no_replace -- creates list of to_prop string and appends prop if True
"""
@@ -29,20 +30,27 @@ def copyprop(body,ctype,prop=None,to_prop=None,create=False,key=None,
return "Unable to parse body as JSON"
if exists(data, prop) and create and not exists(data, to_prop):
- setprop(data, to_prop, "")
+ val = {} if key else ""
+ setprop(data, to_prop, val)
if exists(data, prop) and exists(data, to_prop):
val = getprop(data, prop)
to_element = getprop(data, to_prop)
+
if isinstance(to_element, basestring):
+ if no_replace:
+ el = [to_element] if to_element else []
+ el.append(val)
+ # Flatten
+ val = [e for s in el for e in (s if not isinstance(s, basestring) else [s])]
setprop(data, to_prop, val)
else:
# If key is set, assume to_element is dict or list of dicts
if key:
if not isinstance(to_element, list):
to_element = [to_element]
for dict in to_element:
- if exists(dict, key):
+ if exists(dict, key) or create:
setprop(dict, key, val)
else:
msg = "Key %s does not exist in %s" % (key, to_prop)
@@ -0,0 +1,286 @@
+# -*- encoding: utf-8 -*-
+'''
+@ 2011 by Uche ogbuji <uche@ogbuji.net>
+
+This file is part of the open source Akara project,
+provided under the Apache 2.0 license.
+See the files LICENSE and NOTICE for details.
+Project home, documentation, distributions: http://wiki.xml3k.org/Akara
+
+ Module name:: freemix_akara.oai
+
+Scrapes collections from a OAI site into JSON form for Freemix
+
+= Defined REST entry points =
+
+http://purl.org/com/zepheira/freemix/services/oai.json (freemix_akara.oai) Handles GET
+
+= Configuration =
+
+None
+
+= Notes on security =
+
+This makes heavy access to remote OAI sites
+
+= Notes =
+
+Adapted 2012 by Jeffrey Licht to support resumption tokens
+
+'''
+
+import sys, time
+
+from amara.thirdparty import json
+
+from akara.services import simple_service
+from akara import logger
+from akara import module_config
+
+from dplaingestion.oai import oaiservice
+import sys
+
+
+GETRECORD_SERVICE_ID = 'http://purl.org/la.dp/dpla-get-record'
+
+@simple_service('GET', GETRECORD_SERVICE_ID, 'dpla-get-record', 'application/json')
+def getrecord(endpoint, id):
+ """
+ e.g.:
+
+ curl "http://localhost:8880/dpla-get-record?endpoint=URL&id=IDENTIFIER"
+ """
+ remote = oaiservice(endpoint, logger)
+ get_record_result = remote.get_record(id=id)
+
+ record = get_record_result['record']
+
+ exhibit_record = []
+ properties_used = set() # track the properties in use
+ for rid, rinfo in record:
+ erecord = {u'id': rid}
+ for k, v in rinfo.iteritems():
+ if len(v) == 1:
+ erecord[k] = v[0]
+ else:
+ erecord[k] = v
+ if u'title' in erecord:
+ erecord[u'label'] = erecord[u'title']
+
+ properties_used.update(erecord.keys())
+ exhibit_record.append(erecord)
+
+ PROFILE["properties"][:] = strip_unused_profile_properties(PROFILE["properties"],properties_used)
+
+ #FIXME: This profile is NOT correct. Dumb copy from CDM endpoint. Please fix up below
+ return json.dumps({'items': exhibit_record, 'data_profile': PROFILE}, indent=4)
+
+# Rebuild the data profile by removing optional, unused properties
+strip_unused_profile_properties = lambda prof_props, used: [ p for p in prof_props if p["property"] in used ]
+
+#FIXME: This profile is NOT correct. Dumb copy from CDM endpoint.
+PROFILE = {
+ #"original_MIME_type": "application/vnd.ms-excel",
+ #"Akara_MIME_type_magic_guess": "application/vnd.ms-excel",
+ #"url": "/data/uche/amculturetest/data.json",
+ #"label": "amculturetest",
+ "properties": [
+ {
+ "property": "handle",
+ "enabled": True,
+ "label": "Handle",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "language",
+ "enabled": True,
+ "label": "Language",
+ "types": [
+ "text"
+ ],
+ "tags": [
+ ]
+ },
+ {
+ "property": "creator",
+ "enabled": True,
+ "label": "Creators",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "format",
+ "enabled": True,
+ "label": "Formats",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "relation",
+ "Enabled": True,
+ "label": "Relations",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "id",
+ "enabled": False,
+ "label": "id",
+ "types": [
+ "text"
+ ],
+ "tags": [
+ "property:type=url"
+ ]
+ },
+ {
+ "property": "date",
+ "enabled": True,
+ "label": "Date",
+ "tags": [
+ "property:type=date", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "datestamp",
+ "enabled": True,
+ "label": "Date stamp",
+ "tags": ["property:type=date"]
+ },
+ {
+ "property": "title",
+ "enabled": True,
+ "label": "Title",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ {
+ "property": "description",
+ "enabled": True,
+ "label": "Description",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ {
+ "property": "subject",
+ "enabled": True,
+ "label": "Subject",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "contributor",
+ "enabled": True,
+ "label": "Contributor",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "publisher",
+ "enabled": True,
+ "label": "Publisher",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ {
+ "property": "instructionalmethod",
+ "enabled": True,
+ "label": "Instructional Method",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ {
+ "property": "accrualmethod",
+ "enabled": True,
+ "label": "Accrual Method",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ {
+ "property": "source",
+ "enabled": True,
+ "label": "Source",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ {
+ "property": "provenance",
+ "enabled": True,
+ "label": "Provenance",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "rights",
+ "enabled": True,
+ "label": "Rights",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "rightsholder",
+ "enabled": True,
+ "label": "Rights Holder",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ {
+ "property": "coverage",
+ "enabled": True,
+ "label": "Coverage",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "audience",
+ "enabled": True,
+ "label": "Audience",
+ "tags": [
+ "property:type=text", "property:type=shredded_list"
+ ]
+ },
+ {
+ "property": "label",
+ "enabled": True,
+ "label": "Label",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ {
+ "property": "type",
+ "enabled": True,
+ "label": "Document Type",
+ "types": [
+ "text"
+ ],
+ "tags": []
+ },
+ ],
+ #"Akara_MIME_type_guess": "application/vnd.ms-excel"
+}
Oops, something went wrong.

0 comments on commit 5191660

Please sign in to comment.