Permalink
Browse files

Merge branch 'develop' into smithsonian_ingestion

  • Loading branch information...
Szymon Guz
Szymon Guz committed Mar 15, 2013
2 parents f15842f + a85ca81 commit 51916603ea0390807a4726488f488f81a612036e
View
@@ -129,7 +129,10 @@ MODULES = [
"dplaingestion.akamod.sets_prop",
"dplaingestion.akamod.enrich_language",
"dplaingestion.akamod.arc-to-dpla",
"dplaingestion.akamod.edan_to_dpla"
"dplaingestion.akamod.edan_to_dpla",
"dplaingestion.akamod.dpla-get-record",
"dplaingestion.akamod.primo-to-dpla",
"dplaingestion.akamod.mwdl_enrich_state_located_in"
]
### Section 3: Other module configuration goes here
@@ -180,3 +183,6 @@ class kentucky_identify_object(identify_object):
class artstor_identify_object(identify_object):
pass
class georgia_identify_object(identify_object):
pass
@@ -19,15 +19,15 @@ def convert(data, prop):
if exists(data, prop):
v = getprop(data, prop)
if isinstance(v, basestring):
setprop(data, prop, cleanup(v))
setprop(data, prop, cleanup(v, prop))
elif isinstance(v, list):
temp = []
for val in v:
temp.append(cleanup(val))
temp.append(cleanup(val, prop))
setprop(data, prop, temp)
def cleanup(value):
def cleanup(value, prop):
""" Performs a cleanup of value using a bunch of regexps.
Arguments:
@@ -36,7 +36,10 @@ def cleanup(value):
Returns:
Converted string.
"""
TAGS_FOR_STRIPPING = '[\.\' \r\t\n";,]*' # Tags for stripping at beginning and at the end.
# Do not remove double quotes from title
dquote = '' if prop == "aggregatedCHO/title" else '"'
# Tags for stripping at beginning and at the end.
TAGS_FOR_STRIPPING = '[\.\' \r\t\n;,%s]*' % dquote
REGEXPS = (' *-- *', '--'), \
('[\t ]{2,}', ' '), \
('^' + TAGS_FOR_STRIPPING, ''), \
View
@@ -7,7 +7,7 @@
@simple_service('POST', 'http://purl.org/la/dp/copy_prop', 'copy_prop',
'application/json')
def copyprop(body,ctype,prop=None,to_prop=None,create=False,key=None,
remove=None):
remove=None,no_replace=None):
"""Copies value in one prop to another prop.
Keyword arguments:
@@ -18,6 +18,7 @@ def copyprop(body,ctype,prop=None,to_prop=None,create=False,key=None,
create -- creates to_prop if True (default False)
key -- the key to use if to_prop is a dict (default None)
remove -- removes prop if True (default False)
no_replace -- creates list of to_prop string and appends prop if True
"""
@@ -29,20 +30,27 @@ def copyprop(body,ctype,prop=None,to_prop=None,create=False,key=None,
return "Unable to parse body as JSON"
if exists(data, prop) and create and not exists(data, to_prop):
setprop(data, to_prop, "")
val = {} if key else ""
setprop(data, to_prop, val)
if exists(data, prop) and exists(data, to_prop):
val = getprop(data, prop)
to_element = getprop(data, to_prop)
if isinstance(to_element, basestring):
if no_replace:
el = [to_element] if to_element else []
el.append(val)
# Flatten
val = [e for s in el for e in (s if not isinstance(s, basestring) else [s])]
setprop(data, to_prop, val)
else:
# If key is set, assume to_element is dict or list of dicts
if key:
if not isinstance(to_element, list):
to_element = [to_element]
for dict in to_element:
if exists(dict, key):
if exists(dict, key) or create:
setprop(dict, key, val)
else:
msg = "Key %s does not exist in %s" % (key, to_prop)
@@ -0,0 +1,286 @@
# -*- encoding: utf-8 -*-
'''
@ 2011 by Uche ogbuji <uche@ogbuji.net>
This file is part of the open source Akara project,
provided under the Apache 2.0 license.
See the files LICENSE and NOTICE for details.
Project home, documentation, distributions: http://wiki.xml3k.org/Akara
Module name:: freemix_akara.oai
Scrapes collections from a OAI site into JSON form for Freemix
= Defined REST entry points =
http://purl.org/com/zepheira/freemix/services/oai.json (freemix_akara.oai) Handles GET
= Configuration =
None
= Notes on security =
This makes heavy access to remote OAI sites
= Notes =
Adapted 2012 by Jeffrey Licht to support resumption tokens
'''
import sys, time
from amara.thirdparty import json
from akara.services import simple_service
from akara import logger
from akara import module_config
from dplaingestion.oai import oaiservice
import sys
GETRECORD_SERVICE_ID = 'http://purl.org/la.dp/dpla-get-record'
@simple_service('GET', GETRECORD_SERVICE_ID, 'dpla-get-record', 'application/json')
def getrecord(endpoint, id):
"""
e.g.:
curl "http://localhost:8880/dpla-get-record?endpoint=URL&id=IDENTIFIER"
"""
remote = oaiservice(endpoint, logger)
get_record_result = remote.get_record(id=id)
record = get_record_result['record']
exhibit_record = []
properties_used = set() # track the properties in use
for rid, rinfo in record:
erecord = {u'id': rid}
for k, v in rinfo.iteritems():
if len(v) == 1:
erecord[k] = v[0]
else:
erecord[k] = v
if u'title' in erecord:
erecord[u'label'] = erecord[u'title']
properties_used.update(erecord.keys())
exhibit_record.append(erecord)
PROFILE["properties"][:] = strip_unused_profile_properties(PROFILE["properties"],properties_used)
#FIXME: This profile is NOT correct. Dumb copy from CDM endpoint. Please fix up below
return json.dumps({'items': exhibit_record, 'data_profile': PROFILE}, indent=4)
# Rebuild the data profile by removing optional, unused properties
strip_unused_profile_properties = lambda prof_props, used: [ p for p in prof_props if p["property"] in used ]
#FIXME: This profile is NOT correct. Dumb copy from CDM endpoint.
PROFILE = {
#"original_MIME_type": "application/vnd.ms-excel",
#"Akara_MIME_type_magic_guess": "application/vnd.ms-excel",
#"url": "/data/uche/amculturetest/data.json",
#"label": "amculturetest",
"properties": [
{
"property": "handle",
"enabled": True,
"label": "Handle",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "language",
"enabled": True,
"label": "Language",
"types": [
"text"
],
"tags": [
]
},
{
"property": "creator",
"enabled": True,
"label": "Creators",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "format",
"enabled": True,
"label": "Formats",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "relation",
"Enabled": True,
"label": "Relations",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "id",
"enabled": False,
"label": "id",
"types": [
"text"
],
"tags": [
"property:type=url"
]
},
{
"property": "date",
"enabled": True,
"label": "Date",
"tags": [
"property:type=date", "property:type=shredded_list"
]
},
{
"property": "datestamp",
"enabled": True,
"label": "Date stamp",
"tags": ["property:type=date"]
},
{
"property": "title",
"enabled": True,
"label": "Title",
"types": [
"text"
],
"tags": []
},
{
"property": "description",
"enabled": True,
"label": "Description",
"types": [
"text"
],
"tags": []
},
{
"property": "subject",
"enabled": True,
"label": "Subject",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "contributor",
"enabled": True,
"label": "Contributor",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "publisher",
"enabled": True,
"label": "Publisher",
"types": [
"text"
],
"tags": []
},
{
"property": "instructionalmethod",
"enabled": True,
"label": "Instructional Method",
"types": [
"text"
],
"tags": []
},
{
"property": "accrualmethod",
"enabled": True,
"label": "Accrual Method",
"types": [
"text"
],
"tags": []
},
{
"property": "source",
"enabled": True,
"label": "Source",
"types": [
"text"
],
"tags": []
},
{
"property": "provenance",
"enabled": True,
"label": "Provenance",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "rights",
"enabled": True,
"label": "Rights",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "rightsholder",
"enabled": True,
"label": "Rights Holder",
"types": [
"text"
],
"tags": []
},
{
"property": "coverage",
"enabled": True,
"label": "Coverage",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "audience",
"enabled": True,
"label": "Audience",
"tags": [
"property:type=text", "property:type=shredded_list"
]
},
{
"property": "label",
"enabled": True,
"label": "Label",
"types": [
"text"
],
"tags": []
},
{
"property": "type",
"enabled": True,
"label": "Document Type",
"types": [
"text"
],
"tags": []
},
],
#"Akara_MIME_type_guess": "application/vnd.ms-excel"
}
Oops, something went wrong.

0 comments on commit 5191660

Please sign in to comment.