/
MwdlMapping.scala
125 lines (94 loc) · 4.55 KB
/
MwdlMapping.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package dpla.ingestion3.mappers.providers
import dpla.ingestion3.enrichments.normalizations.StringNormalizationUtils._
import dpla.ingestion3.mappers.utils.{Document, XmlMapping, XmlExtractor}
import dpla.ingestion3.model.DplaMapData._
import dpla.ingestion3.model.{nameOnlyAgent, _}
import dpla.ingestion3.utils.Utils
import org.json4s.JValue
import org.json4s.JsonDSL._
import scala.xml._
class MwdlMapping extends XmlMapping with XmlExtractor {
private val baseIsShownAt = "http://utah-primoprod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=MWDL&afterPDS=true&docId="
// ID minting functions
override def useProviderName(): Boolean = true
override def getProviderName(): String = "mwdl"
override def originalId(implicit data: Document[NodeSeq]): ZeroToOne[String] =
extractString(data \\ "PrimoNMBib" \ "record" \ "control" \ "recordid")
// SourceResource mapping
override def collection(data: Document[NodeSeq]): Seq[DcmiTypeCollection] =
extractStrings(data \\ "search" \ "lsr13")
.map(nameOnlyCollection)
override def contributor(data: Document[NodeSeq]): Seq[EdmAgent] =
extractStrings(data \\ "display" \ "contributor")
.flatMap(_.splitAtDelimiter(";"))
.map(nameOnlyAgent)
override def creator(data: Document[NodeSeq]): Seq[EdmAgent] =
extractStrings(data \\ "display" \ "creator")
.flatMap(_.splitAtDelimiter(";"))
.map(nameOnlyAgent)
override def date(data: Document[NodeSeq]): Seq[EdmTimeSpan] =
// search/creationdate AND PrimoNMBib/record/display/creationdate
extractStrings(data \\ "display" \ "creationdate")
.flatMap(_.splitAtDelimiter(";"))
.map(stringOnlyTimeSpan)
override def description(data: Document[NodeSeq]): Seq[String] =
// search/description (contains dc:description, dcterms:abstract, and dcterms:tableOfContents)
extractStrings(data \\ "search" \ "description")
.map(_.limitCharacters(1000))
override def extent(data: Document[NodeSeq]): ZeroToMany[String] =
extractStrings(data \\ "display" \ "lds05")
override def identifier(data: Document[NodeSeq]): Seq[String] =
extractStrings(data \\ "control" \ "recordid")
override def language(data: Document[NodeSeq]): Seq[SkosConcept] =
extractStrings(data \\ "facets" \ "language")
.map(nameOnlyConcept)
override def place(data: Document[NodeSeq]): Seq[DplaPlace] =
extractStrings(data \\ "display" \ "lds08")
.flatMap(_.splitAtDelimiter(";"))
.map(nameOnlyPlace)
override def relation(data: Document[NodeSeq]): ZeroToMany[LiteralOrUri] =
extractStrings(data \\ "display" \ "relation")
.flatMap(_.splitAtDelimiter(";"))
.map(eitherStringOrUri)
override def rights(data: Document[NodeSeq]): AtLeastOne[String] =
(data \\ "display" \ "rights")
.flatMap(extractStrings)
override def subject(data: Document[NodeSeq]): Seq[SkosConcept] =
// display/subject
extractStrings(data \\ "display" \ "subject")
.flatMap(_.splitAtDelimiter(";"))
.map(nameOnlyConcept)
override def temporal(data: Document[NodeSeq]): ZeroToMany[EdmTimeSpan] =
extractStrings(data \\ "display" \ "lds09")
.map(stringOnlyTimeSpan)
override def title(data: Document[NodeSeq]): Seq[String] =
extractStrings(data \\ "display" \ "title")
override def `type`(data: Document[NodeSeq]): Seq[String] =
// facets/rsrctype
extractStrings(data \\ "facets" \ "rsrctype")
// OreAggregation
override def dplaUri(data: Document[NodeSeq]): ZeroToOne[URI] = mintDplaItemUri(data)
override def dataProvider(data: Document[NodeSeq]): ZeroToMany[EdmAgent] =
(data \\ "display" \ "lds03")
.flatMap(extractStrings)
.map(nameOnlyAgent)
override def isShownAt(data: Document[NodeSeq]): ZeroToMany[EdmWebResource] =
// baseIsShownAt + control\recordid
(data \\ "control" \ "recordid")
.flatMap(extractStrings)
.map(baseIsShownAt + _)
.map(stringOnlyWebResource)
override def originalRecord(data: Document[NodeSeq]): ExactlyOne[String] = Utils.formatXml(data)
override def preview(data: Document[NodeSeq]): ZeroToMany[EdmWebResource] =
(data \\ "LINKS" \ "thumbnail")
.flatMap(extractStrings)
.map(stringOnlyWebResource)
override def provider(data: Document[NodeSeq]): ExactlyOne[EdmAgent] = agent
override def sidecar(data: Document[NodeSeq]): JValue =
("prehashId" -> buildProviderBaseId()(data)) ~ ("dplaId" -> mintDplaId(data))
// Helper method
def agent = EdmAgent(
name = Some("Mountain West Digital Library"),
uri = Some(URI("http://dp.la/api/contributor/mwdl"))
)
}