Skip to content
This repository
Browse code

First working draft of edan ingestion.

  • Loading branch information...
commit 52163077d6ce01ad5980bd8929e6b6dc96c1bad8 1 parent 445f313
authored March 13, 2013
128  lib/akamod/edan_to_dpla.py
@@ -41,6 +41,28 @@
41 41
    }
42 42
 }
43 43
 
  44
+
  45
+def transform_description(d):
  46
+    description = None
  47
+    items = arc_group_extraction(d, "freetext", "notes")
  48
+    for item in (items if isinstance(items, list) else [items]):
  49
+        if "@label" in item and item["@label"] == "Notes":
  50
+            if "#text" in item:
  51
+                description = item["#text"]
  52
+                break;
  53
+    return {"description": description} if description else {}
  54
+
  55
+
  56
+def extract_date(d, group_key, item_key):
  57
+    dates = []
  58
+    items = arc_group_extraction(d, group_key, item_key)
  59
+    for item in (items if isinstance(items, list) else [items]):
  60
+        if "#text" in item:
  61
+            dates.append(item["#text"])
  62
+
  63
+    return {"date": "; ".join(dates)} if dates else {}
  64
+
  65
+
44 66
 def date_transform(d, groupKey, itemKey):
45 67
     date = None
46 68
     if isinstance(itemKey, list):
@@ -95,6 +117,75 @@ def creator_transform(d):
95 117
     return {"creator": creator} if creator else {}
96 118
 
97 119
 
  120
+def transform_format(d):
  121
+    f = []
  122
+    labels = ["Physical description", "Medium"]
  123
+    formats = arc_group_extraction(d, "freetext", "physicalDescription")
  124
+    [f.append(e["#text"]) for e in formats if e["@label"] in labels]
  125
+
  126
+    return {"format": f} if f else {}
  127
+
  128
+
  129
+def transform_rights(d):
  130
+    p = []
  131
+    ps = arc_group_extraction(d, "freetext", "creditLine")
  132
+    if ps != [None]:
  133
+        [p.append(e["#text"]) for e in ps if "@label" in e and e["@label"] == "Credit line"]
  134
+
  135
+    ps = arc_group_extraction(d, "freetext", "objectRights")
  136
+    if ps != [None]:
  137
+        [p.append(e["#text"]) for e in ps if "@label" in e and e["@label"] == "Rights"]
  138
+
  139
+    return {"rights": p} if p else {}
  140
+
  141
+
  142
+def transform_publisher(d):
  143
+    p = []
  144
+    ps = arc_group_extraction(d, "freetext", "publisher")
  145
+    if ps:
  146
+        [p.append(e["#text"]) for e in ps]
  147
+
  148
+    return {"publisher": p} if p else {}
  149
+
  150
+
  151
+def transform_place(d):
  152
+    place = []
  153
+    labels = ["Place", "Country", "Site"]
  154
+    places = arc_group_extraction(d, "freetext", "place")
  155
+    [place.append(e["#text"]) for e in places if e["@label"] in labels]
  156
+
  157
+    return {"place": place} if place else {}
  158
+
  159
+def transform_title(d):
  160
+    p = []
  161
+    labels = ["Title", "Object Name"]
  162
+    ps = arc_group_extraction(d, "title")
  163
+    if ps != [None]:
  164
+        [p.append(e["#text"]) for e in ps if e["@label"] in labels]
  165
+    
  166
+    return {"title": p} if p else {}
  167
+
  168
+def transform_subject(d):
  169
+    p = []
  170
+    ps = arc_group_extraction(d, "freetext", "topic")
  171
+    if ps != [None]:
  172
+        [p.append(e["#text"]) for e in ps if e["@label"] == "Topic"]
  173
+    
  174
+    ps = arc_group_extraction(d, "freetext", "culture")
  175
+    if ps != [None]:
  176
+        [p.append(e["#text"]) for e in ps if e["@label"] == "Nationality"]
  177
+
  178
+    return {"subject": p} if p else {}
  179
+
  180
+
  181
+def transform_identifier(d):
  182
+    extent = []
  183
+    extents = arc_group_extraction(d, "freetext", "identifier")
  184
+    [extent.append(e) for e in extents if e["@label"].startswith("Catalog") or e["@label"].startswith("Accession")]
  185
+
  186
+    return {"extent": extent} if extent else {}
  187
+
  188
+
98 189
 def extent_transform(d):
99 190
     extent = []
100 191
     extents = arc_group_extraction(d, "freetext", "physicalDescription")
@@ -134,17 +225,6 @@ def subject_and_spatial_transform(d):
134 225
     
135 226
     return v
136 227
 
137  
-def rights_transform(d):
138  
-    rights = []
139  
-
140  
-    r = arc_group_extraction(d, "access-restriction", "restriction-status")[0]
141  
-    if r:
142  
-        rights.append("Restrictions: %s" % r)
143  
-    r = arc_group_extraction(d, "use-restriction", "use-status")[0]
144  
-    if r:
145  
-        rights.append("Use status: %s" % r)
146  
-
147  
-    return {"rights": "; ".join(filter(None,rights))} if rights else {}
148 228
 
149 229
 def type_transform(d):
150 230
     type = []
@@ -237,9 +317,18 @@ def arc_group_extraction(d, groupKey, itemKey, nameKey=None):
237 317
 # Structure mapping the original top level property to a function returning a single
238 318
 # item dict representing the new property and its value
239 319
 CHO_TRANSFORMER = {
240  
-    "physical-occurrences"  : extent_transform,
  320
+    "freetext/physicalDescription" : extent_transform,
241 321
     "freetext/name"         : creator_transform,
242  
-    "freetext/setName"       : is_part_of_transform,
  322
+    "freetext/setName"      : is_part_of_transform,
  323
+    "freetext/date"         : lambda d: extract_date(d,"freetext","date"),
  324
+    "freetext/notes"        : transform_description,
  325
+    "freetext/identifier"   : transform_identifier,
  326
+    "language"              : lambda d: {"language": d.get("language") },
  327
+    "freetext/physicalDescription" : transform_format,
  328
+    "freetext/place"        : transform_place,
  329
+    "freetext/publisher"    : transform_publisher,
  330
+    "title"                 : transform_title,
  331
+
243 332
 #    "release-dates"         : lambda d: date_transform(d,"release-dates","release-date"),
244 333
 #    "broadcast-dates"       : lambda d: date_transform(d,"broadcast-dates","broadcast-date"),
245 334
 #    "production-dates"      : lambda d: date_transform(d,"production-dates","production-date"),
@@ -288,24 +377,15 @@ def edantodpla(body,ctype,geoprop=None):
288 377
     for k, v in CHO_TRANSFORMER.items():
289 378
         if exists(data, k):
290 379
             out["aggregatedCHO"].update(v(data))
291  
-    #for p in data.keys():
292  
-    #    if p in CHO_TRANSFORMER:
293  
-    #        out["aggregatedCHO"].update(CHO_TRANSFORMER[p](data))
294  
-
295 380
     for k, v in AGGREGATION_TRANSFORMER.items():
296  
-        logger.debug(k)
297  
-        logger.debug(v)
298 381
         if exists(data, k):
299  
-            logger.debug("FOUND")
300 382
             out.update(v(data))
301 383
 
302  
-    #    if p in AGGREGATION_TRANSFORMER:
303  
-    #        out.update(AGGREGATION_TRANSFORMER[p](data))
304  
-
305 384
     # Apply transformations that are dependent on more than one
306 385
     # original document  field
307 386
     #out["aggregatedCHO"].update(type_transform(data))
308  
-    #out["aggregatedCHO"].update(rights_transform(data))
  387
+    out["aggregatedCHO"].update(transform_rights(data))
  388
+    out["aggregatedCHO"].update(transform_subject(data))
309 389
     #out["aggregatedCHO"].update(subject_and_spatial_transform(data))
310 390
     #out.update(has_view_transform(data))
311 391
 
2  profiles/smithsonian.pjs
@@ -12,5 +12,5 @@
12 12
         "name": "Smithsonian"
13 13
     },
14 14
     "type": "edan",
15  
-    "endpoint_URL": "file:/home/szymon/smithsonian_demo/"
  15
+    "endpoint_URL": "file:/home/szymon/smithsonian/"
16 16
 }

0 notes on commit 5216307

Please sign in to comment.
Something went wrong with that request. Please try again.