Skip to content

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
  • 5 commits
  • 3 files changed
  • 0 commit comments
  • 1 contributor
Showing with 133 additions and 44 deletions.
  1. +2 −1 akara.conf
  2. +130 −42 lib/akamod/{edan-to-dpla.py → edan_to_dpla.py}
  3. +1 −1 profiles/smithsonian.pjs
View
3 akara.conf
@@ -128,7 +128,8 @@ MODULES = [
"dplaingestion.akamod.cleanup_value",
"dplaingestion.akamod.sets_prop",
"dplaingestion.akamod.enrich_language",
- "dplaingestion.akamod.arc-to-dpla"
+ "dplaingestion.akamod.arc-to-dpla",
+ "dplaingestion.akamod.edan_to_dpla"
]
### Section 3: Other module configuration goes here
View
172 lib/akamod/edan-to-dpla.py → lib/akamod/edan_to_dpla.py
@@ -41,6 +41,28 @@
}
}
+
+def transform_description(d):
+ description = None
+ items = arc_group_extraction(d, "freetext", "notes")
+ for item in (items if isinstance(items, list) else [items]):
+ if "@label" in item and item["@label"] == "Notes":
+ if "#text" in item:
+ description = item["#text"]
+ break;
+ return {"description": description} if description else {}
+
+
+def extract_date(d, group_key, item_key):
+ dates = []
+ items = arc_group_extraction(d, group_key, item_key)
+ for item in (items if isinstance(items, list) else [items]):
+ if "#text" in item:
+ dates.append(item["#text"])
+
+ return {"date": "; ".join(dates)} if dates else {}
+
+
def date_transform(d, groupKey, itemKey):
date = None
if isinstance(itemKey, list):
@@ -52,17 +74,17 @@ def date_transform(d, groupKey, itemKey):
return {"date": date} if date else {}
+
def is_part_of_transform(d):
is_part_of = []
- lods = ["series", "file unit"]
- items = arc_group_extraction(d, "hierarchy", "hierarchy-item")
+ items = arc_group_extraction(d, "freetext", "setName")
for item in (items if isinstance(items, list) else [items]):
- if item["hierarchy-item-lod"].lower() in lods:
- is_part_of.append("%s: %s" % (item["hierarchy-item-lod"],
- item["hierarchy-item-title"]))
+ if "#text" in item:
+ is_part_of.append(item["#text"])
return {"isPartOf": "; ".join(is_part_of)} if is_part_of else {}
+
def source_transform(d):
source = None
for s in d["handle"]:
@@ -79,12 +101,12 @@ def is_shown_at_transform(d):
def collection_transform(d):
- collection = getprop(d, "collection")
- items = arc_group_extraction(d, "hierarchy", "hierarchy-item")
+ collections = []
+ items = arc_group_extraction(d, "freetext", "setName")
for item in (items if isinstance(items, list) else [items]):
- if item["hierarchy-item-id"] == collection["name"]:
- setprop(collection, "name", item["hierarchy-item-title"])
- return {"collection": collection} if collection else {}
+ if "#text" in item:
+ collections.append(item["#text"])
+ return {"collection": collections} if collections else {}
def creator_transform(d):
@@ -97,6 +119,77 @@ def creator_transform(d):
return {"creator": creator} if creator else {}
+def transform_format(d):
+ f = []
+ labels = ["Physical description", "Medium"]
+ formats = arc_group_extraction(d, "freetext", "physicalDescription")
+ [f.append(e["#text"]) for e in formats if e["@label"] in labels]
+
+ return {"format": f} if f else {}
+
+
+def transform_rights(d):
+ p = []
+ ps = arc_group_extraction(d, "freetext", "creditLine")
+ if ps != [None]:
+ [p.append(e["#text"]) for e in ps if "@label" in e and e["@label"] == "Credit line"]
+
+ ps = arc_group_extraction(d, "freetext", "objectRights")
+ if ps != [None]:
+ [p.append(e["#text"]) for e in ps if "@label" in e and e["@label"] == "Rights"]
+
+ return {"rights": p} if p else {}
+
+
+def transform_publisher(d):
+ p = []
+ ps = arc_group_extraction(d, "freetext", "publisher")
+ if ps:
+ [p.append(e["#text"]) for e in ps]
+
+ return {"publisher": p} if p else {}
+
+
+def transform_place(d):
+ place = []
+ labels = ["Place", "Country", "Site"]
+ places = arc_group_extraction(d, "freetext", "place")
+ [place.append(e["#text"]) for e in places if e["@label"] in labels]
+
+ return {"place": place} if place else {}
+
+
+def transform_title(d):
+ p = []
+ labels = ["Title", "Object Name"]
+ ps = arc_group_extraction(d, "title")
+ if ps != [None]:
+ [p.append(e["#text"]) for e in ps if e["@label"] in labels]
+
+ return {"title": p} if p else {}
+
+
+def transform_subject(d):
+ p = []
+ ps = arc_group_extraction(d, "freetext", "topic")
+ if ps != [None]:
+ [p.append(e["#text"]) for e in ps if e["@label"] == "Topic"]
+
+ ps = arc_group_extraction(d, "freetext", "culture")
+ if ps != [None]:
+ [p.append(e["#text"]) for e in ps if e["@label"] == "Nationality"]
+
+ return {"subject": p} if p else {}
+
+
+def transform_identifier(d):
+ extent = []
+ extents = arc_group_extraction(d, "freetext", "identifier")
+ [extent.append(e) for e in extents if e["@label"].startswith("Catalog") or e["@label"].startswith("Accession")]
+
+ return {"extent": extent} if extent else {}
+
+
def extent_transform(d):
extent = []
extents = arc_group_extraction(d, "freetext", "physicalDescription")
@@ -136,17 +229,6 @@ def subject_and_spatial_transform(d):
return v
-def rights_transform(d):
- rights = []
-
- r = arc_group_extraction(d, "access-restriction", "restriction-status")[0]
- if r:
- rights.append("Restrictions: %s" % r)
- r = arc_group_extraction(d, "use-restriction", "use-status")[0]
- if r:
- rights.append("Use status: %s" % r)
-
- return {"rights": "; ".join(filter(None,rights))} if rights else {}
def type_transform(d):
type = []
@@ -163,6 +245,7 @@ def type_transform(d):
return {"type": "; ".join(type)} if type else {}
+
def has_view_transform(d):
has_view = []
@@ -193,6 +276,7 @@ def add_views(has_view,rge,url,format=None):
return {"hasView": has_view} if has_view else {}
+
def arc_group_extraction(d, groupKey, itemKey, nameKey=None):
"""
Generalization of what proved to be an idiom in ARC information extraction,
@@ -239,27 +323,26 @@ def arc_group_extraction(d, groupKey, itemKey, nameKey=None):
# Structure mapping the original top level property to a function returning a single
# item dict representing the new property and its value
CHO_TRANSFORMER = {
- "physical-occurrences" : extent_transform,
- "creators" : creator_transform,
-# "hierarchy" : is_part_of_transform,
-# "release-dates" : lambda d: date_transform(d,"release-dates","release-date"),
-# "broadcast-dates" : lambda d: date_transform(d,"broadcast-dates","broadcast-date"),
-# "production-dates" : lambda d: date_transform(d,"production-dates","production-date"),
-# "coverage-dates" : lambda d: date_transform(d,"coverage-dates",["cov-start-date","cov-end-date"]),
-# "copyright-dates" : lambda d: date_transform(d,"copyright-dates","copyright-date"),
-# "title" : lambda d: {"title": d.get("title-only")},
-# "scope-content-note" : lambda d: {"description": d.get("scope-content-note")},
-# "languages" : lambda d: {"language": arc_group_extraction(d,"languages","language")}
+ "freetext/physicalDescription" : extent_transform,
+ "freetext/name" : creator_transform,
+ "freetext/setName" : is_part_of_transform,
+ "freetext/date" : lambda d: extract_date(d,"freetext","date"),
+ "freetext/notes" : transform_description,
+ "freetext/identifier" : transform_identifier,
+ "language" : lambda d: {"language": d.get("language") },
+ "freetext/physicalDescription" : transform_format,
+ "freetext/place" : transform_place,
+ "freetext/publisher" : transform_publisher,
+ "title" : transform_title,
}
AGGREGATION_TRANSFORMER = {
"id" : lambda d: {"id": d.get("id"), "@id" : "http://dp.la/api/items/"+d.get("id","")},
"_id" : lambda d: {"_id": d.get("_id")},
"originalRecord" : lambda d: {"originalRecord": d.get("originalRecord",None)},
-# "ingestType" : lambda d: {"ingestType": d.get("ingestType")},
-# "ingestDate" : lambda d: {"ingestDate": d.get("ingestDate")},
-# "collection" : collection_transform,
-# "arc-id-desc" : is_shown_at_transform
+ "ingestType" : lambda d: {"ingestType": d.get("ingestType")},
+ "ingestDate" : lambda d: {"ingestDate": d.get("ingestDate")},
+ "collection" : lambda d: {"collection": d.get("collection")},
}
@simple_service("POST", "http://purl.org/la/dp/edan-to-dpla", "edan-to-dpla", "application/ld+json")
@@ -285,20 +368,24 @@ def edantodpla(body,ctype,geoprop=None):
"aggregatedCHO": {}
}
+ logger.debug("x"*60)
# Apply all transformation rules from original document
- for p in data.keys():
- if p in CHO_TRANSFORMER:
- out["aggregatedCHO"].update(CHO_TRANSFORMER[p](data))
- if p in AGGREGATION_TRANSFORMER:
- out.update(AGGREGATION_TRANSFORMER[p](data))
+ for k, v in CHO_TRANSFORMER.items():
+ if exists(data, k):
+ out["aggregatedCHO"].update(v(data))
+ for k, v in AGGREGATION_TRANSFORMER.items():
+ if exists(data, k):
+ out.update(v(data))
# Apply transformations that are dependent on more than one
# original document field
#out["aggregatedCHO"].update(type_transform(data))
- #out["aggregatedCHO"].update(rights_transform(data))
+ out["aggregatedCHO"].update(transform_rights(data))
+ out["aggregatedCHO"].update(transform_subject(data))
#out["aggregatedCHO"].update(subject_and_spatial_transform(data))
#out.update(has_view_transform(data))
+ logger.debug("x"*60)
if exists(out, "aggregatedCHO/date"):
logger.debug("OUTTYPE: %s"%getprop(out, "aggregatedCHO/date"))
@@ -315,6 +402,7 @@ def edantodpla(body,ctype,geoprop=None):
return json.dumps(out)
+
creator_field_names = [
"Architect",
"Artist",
View
2 profiles/smithsonian.pjs
@@ -12,5 +12,5 @@
"name": "Smithsonian"
},
"type": "edan",
- "endpoint_URL": "file:/home/szymon/smithsonian_demo/"
+ "endpoint_URL": "file:/home/szymon/smithsonian/"
}

No commit comments for this range

Something went wrong with that request. Please try again.