Permalink
Browse files

MWDL fixes

-Added mapping of "identifier"
-Renamed mwdl_enrich_location to mwdl_enrich_state_located_in and
 updated profile to run enrich_location against stateLocatedIn
 field
  • Loading branch information...
1 parent 62abe8d commit 790108e0ae33b5f0a78d2fd97d4b58d8c102994c Miguel Alatorre committed Mar 15, 2013
View
@@ -131,7 +131,7 @@ MODULES = [
"dplaingestion.akamod.arc-to-dpla",
"dplaingestion.akamod.dpla-get-record",
"dplaingestion.akamod.primo-to-dpla",
- "dplaingestion.akamod.mwdl_enrich_location"
+ "dplaingestion.akamod.mwdl_enrich_state_located_in"
]
### Section 3: Other module configuration goes here
@@ -4,13 +4,13 @@
from amara.thirdparty import json
from dplaingestion.selector import getprop, setprop, exists
-@simple_service('POST', 'http://purl.org/la/dp/mwdl_enrich_location',
- 'mwdl_enrich_location', 'application/json')
-def mwdlenrichlocation(body, ctype, action="mdl_enrich_location",
- prop="aggregatedCHO/spatial"):
+@simple_service('POST', 'http://purl.org/la/dp/mwdl_enrich_state_located_in',
+ 'mwdl_enrich_state_located_in', 'application/json')
+def mwdlenrichstatelocatedin(body, ctype, action="mdl_enrich_state_located_in",
+ prop="aggregatedCHO/stateLocatedIn"):
"""
- Service that accepts a JSON document and enriches the "spatial" field of
- that document by:
+ Service that accepts a JSON document and enriches the "stateLocatedIn"
+ field of that document by:
For primary use with MWDL documents.
"""
@@ -23,14 +23,14 @@ def mwdlenrichlocation(body, ctype, action="mdl_enrich_location",
return "Unable to parse body as JSON"
if exists(data,prop):
- spatial = []
+ sli = []
values = getprop(data,prop)
for v in values.split(";"):
if STATE_CODES.get(v):
- spatial.append(STATE_CODES[v])
+ sli.append(STATE_CODES[v])
else:
- spatial.append(v)
- setprop(data, prop, "; ".join(spatial))
+ sli.append(v)
+ setprop(data, prop, "; ".join(sli))
return json.dumps(data)
@@ -44,12 +44,6 @@
}
}
-def title_transform(d, p):
- title = getprop(d, p)
- alt_field = RECORD + "display/lds10"
- alt = getprop(d, alt_field) if exists(d, alt_field) else None
- return [title, alt] if alt else title
-
def web_resource_transform(d, url):
format_field = RECORD + "display/format"
format = getprop(d, format_field) if exists(d, format_field) else None
@@ -62,6 +56,7 @@ def multi_transform(d, key, props):
p = RECORD + p
if exists(d, p):
v = getprop(d, p)
+ if not v: continue
if not isinstance(v, list):
v = [v]
[values.append(s) for s in v if s not in values]
@@ -71,7 +66,6 @@ def multi_transform(d, key, props):
# Structure mapping the original top level property to a function returning a single
# item dict representing the new property and its value
CHO_TRANSFORMER = {
- RECORD + "display/contributor" : lambda d, p: {"contributor": getprop(d, p)},
RECORD + "display/creator" : lambda d, p: {"creator": getprop(d, p)},
RECORD + "search/creationdate" : lambda d, p: {"date": getprop(d, p)},
RECORD + "search/description" : lambda d, p: {"description": getprop(d, p)},
@@ -81,8 +75,8 @@ def multi_transform(d, key, props):
RECORD + "display/rights" : lambda d, p: {"rights": getprop(d, p)},
RECORD + "display/subject" : lambda d, p: {"subject": getprop(d, p)},
RECORD + "display/lds09" : lambda d, p: {"temporal": getprop(d, p)},
- RECORD + "display/title" : lambda d, p: {"title": title_transform(d, p)},
- RECORD + "display/lds18" : lambda d, p: {"type": getprop(d, p)}
+ RECORD + "display/lds18" : lambda d, p: {"type": getprop(d, p)},
+ RECORD + "search/lsr03" : lambda d, p: {"stateLocatedIn": getprop(d, p)}
}
AGGREGATION_TRANSFORMER = {
@@ -131,10 +125,14 @@ def primotodpla(body,ctype,geoprop=None):
# Apply transformations that are dependent on more than one
# original document field
- sp_props = ["display/lds08", "search/lsr14", "search/lsr03"]
+ id_props = ["control/recordid", "display/identifier"]
+ sp_props = ["display/lds08", "search/lsr14"]
ipo_props = ["display/lds04", "search/lsr13"]
+ title_props = ["display/title", "display/lds10"]
+ out["aggregatedCHO"].update(multi_transform(data, "identifier", id_props))
out["aggregatedCHO"].update(multi_transform(data, "spatial", sp_props))
out["aggregatedCHO"].update(multi_transform(data, "isPartOf", ipo_props))
+ out["aggregatedCHO"].update(multi_transform(data, "title", title_props))
dp_props = ["display/lds03", "search/lsr12"]
out.update(multi_transform(data, "dataProvider", dp_props))
View
@@ -3,23 +3,25 @@
"enrichments_coll": [],
"name": "mwdl",
"enrichments_rec": [
- "http://localhost:8881/select-id?prop=_id",
- "http://localhost:8881/primo-to-dpla",
- "http://localhost:8881/shred?prop=aggregatedCHO%2Fcontributor%2CaggregatedCHO%2Fcreator%2CaggregatedCHO%2Fdate",
- "http://localhost:8881/shred?prop=aggregatedCHO%2Flanguage%2CaggregatedCHO%2Fpublisher%2CaggregatedCHO%2Frelation",
- "http://localhost:8881/shred?prop=aggregatedCHO%2Fsubject%2CaggregatedCHO%2Ftype",
- "http://localhost:8881/shred?prop=isShownAt%2Fformat",
- "http://localhost:8881/mwdl_enrich_location",
- "http://localhost:8881/move_date_values?prop=aggregatedCHO%2Fsubject",
- "http://localhost:8881/move_date_values?prop=aggregatedCHO%2Fspatial",
- "http://localhost:8881/enrich-date",
- "http://localhost:8881/enrich-subject",
- "http://localhost:8881/cleanup_value",
- "http://localhost:8881/enrich-type",
- "http://localhost:8881/enrich-format",
- "http://localhost:8881/enrich_location",
- "http://localhost:8881/enrich_language",
- "http://localhost:8881/sets_prop?prop=aggregatedCHO%2FphysicalMedium"
+ "/select-id?prop=_id",
+ "/primo-to-dpla",
+ "/shred?prop=aggregatedCHO%2Fcontributor%2CaggregatedCHO%2Fcreator%2CaggregatedCHO%2Fdate",
+ "/shred?prop=aggregatedCHO%2Flanguage%2CaggregatedCHO%2Fpublisher%2CaggregatedCHO%2Frelation",
+ "/shred?prop=aggregatedCHO%2Fsubject%2CaggregatedCHO%2Ftype",
+ "/shred?prop=isShownAt%2Fformat",
+ "/mwdl_enrich_state_located_in",
+ "/move_date_values?prop=aggregatedCHO%2Fsubject",
+ "/move_date_values?prop=aggregatedCHO%2Fspatial",
+ "/enrich_earliest_date",
+ "/enrich_date",
+ "/enrich-subject",
+ "/cleanup_value",
+ "/enrich-type",
+ "/enrich-format",
+ "/enrich_location",
+ "/enrich_location?prop=aggregatedCHO%2FstateLocatedIn",
+ "/enrich_language",
+ "/sets_prop?prop=aggregatedCHO%2FphysicalMedium"
],
"last_checked": "2013-03-05T17:30:21.689809",
"contributor": {

0 comments on commit 790108e

Please sign in to comment.