Skip to content
This repository has been archived by the owner on Dec 18, 2019. It is now read-only.

Commit

Permalink
Implemented many changes for Smithsonian ingestion data format.
Browse files Browse the repository at this point in the history
  • Loading branch information
Szymon Guz committed Mar 20, 2013
1 parent ca11c5d commit bd25c7a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
25 changes: 17 additions & 8 deletions lib/akamod/edan_to_dpla.py
Expand Up @@ -54,14 +54,13 @@ def transform_description(d):


def transform_date(d):
date = None
date = []
dates = arc_group_extraction(d, "freetext", "date")
for item in dates:
if "@label" in item and "#text" in item:
if item["@label"] == "Date":
date = item["#text"]
break
return {"date": date} if date else {}
date.append(item["#text"])

return {"temporal": date} if date else {}

def extract_date(d, group_key, item_key):
dates = []
Expand Down Expand Up @@ -109,11 +108,19 @@ def source_transform(d):


def transform_is_shown_at(d):
propname = "descriptiveNonRepeating/record_link"
propname = "descriptiveNonRepeating/online_media/media/#text"

obj = getprop(d, propname, True)
return {"isShownAt": obj} if obj else {}


def transform_object(d):
propname = "descriptiveNonRepeating/online_media/media/@thumbnail"

obj = getprop(d, propname, True)
return {"object": obj} if obj else {}


def collection_transform(d):
import re
collections = []
Expand Down Expand Up @@ -144,7 +151,7 @@ def creator_transform(d):

def transform_format(d):
f = []
labels = ["Physical description", "Medium"]
labels = ["Physical description", "Physical description", "Medium"]
formats = arc_group_extraction(d, "freetext", "physicalDescription")
[f.append(e["#text"]) for e in formats if e["@label"] in labels]

Expand Down Expand Up @@ -334,9 +341,10 @@ def transform_title(d):
def transform_subject(d):

p = []
topic_labels = ["Topic", "subject", "event"]
ps = arc_group_extraction(d, "freetext", "topic")
if ps != [None]:
[p.append(e["#text"]) for e in ps if e["@label"] == "Topic"]
[p.append(e["#text"]) for e in ps if e["@label"] in topic_labels]

ps = arc_group_extraction(d, "freetext", "culture")
if ps != [None]:
Expand Down Expand Up @@ -589,6 +597,7 @@ def edantodpla(body,ctype,geoprop=None):
out["sourceResource"].update(transform_spatial(data))

out.update(transform_is_shown_at(data))
out.update(transform_object(data))

slugify_field(out, "collection/@id")

Expand Down
1 change: 1 addition & 0 deletions profiles/smithsonian.pjs
Expand Up @@ -10,6 +10,7 @@
"/shred?prop=aggregatedCHO%2Fsubject%2CaggregatedCHO%2Ftype",
"/shred?prop=aggregatedCHO%2Fsubject&delim=%3Cbr%3E",
"/shred?prop=sourceResource%2Fformat",
"/copy_prop?prop=sourceResource%2Ftemporal&to_prop=sourceResource%2Fdate&create=True",
"/enrich_earliest_date",
"/enrich_date",
"/enrich-subject",
Expand Down

0 comments on commit bd25c7a

Please sign in to comment.