-
Notifications
You must be signed in to change notification settings - Fork 8
/
nodes.go
303 lines (260 loc) · 7.58 KB
/
nodes.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
// Copyright 2017 Delving B.V.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ead
import (
"fmt"
"os"
"strings"
"github.com/rs/zerolog/log"
"github.com/delving/hub3/config"
"github.com/delving/hub3/hub3/fragments"
r "github.com/kiivihal/rdf2go"
)
const FragmentGraphDocType = "ead"
const CLevelLeader = "@"
// Node holds all the clevel information.
type Node struct {
CTag string
Depth int32
Type string
SubType string
Header *Header
Nodes []*Node
Children int
Order uint64
ParentIDs []string
Path string
BranchID string
AccessRestrict string
AccessRestrictYear string
Material string
Phystech []string
PhystechType string
triples []*r.Triple
}
type NodeList struct {
Type string
Label []string
Nodes []*Node
}
type Header struct {
Type string
InventoryNumber string
ID []*NodeID
Label []string
Date []*NodeDate
Physdesc string
Physloc string
DateAsLabel bool
HasDigitalObject bool
DaoLink string
AltRender string
Genreform string
Attridentifier string
}
type NodeDate struct {
Calendar string
Era string
Normal string
Label string
Type string
}
type NodeID struct {
TypeID string
Type string
Audience string
ID string
}
func newSubject(cfg *NodeConfig, id string) string {
// TODO(kiivihal): replace config option for RDF.BaseURL
return fmt.Sprintf("%s/%s/archive/%s/%s",
config.Config.RDF.BaseURL, cfg.OrgID, cfg.Spec, id)
}
// getFirstBranch returs the first parent of the current node
func (n *Node) getFirstBranch() string {
parents := strings.Split(n.Path, pathSep)
if len(parents) < 2 {
return ""
}
return fmt.Sprintf("%s%s", CLevelLeader, strings.Join(parents[:len(parents)-1], pathSep))
}
// getSecondBranch returs the second parent of the current node
func (n *Node) getSecondBranch() string {
parents := strings.Split(n.Path, pathSep)
if len(parents) < 3 {
return ""
}
return fmt.Sprintf("%s%s", CLevelLeader, strings.Join(parents[:len(parents)-2], pathSep))
}
// FragmentGraph returns the archival node as a FragmentGraph
func (n *Node) FragmentGraph(cfg *NodeConfig) (*fragments.FragmentGraph, *fragments.ResourceMap, error) {
rm := fragments.NewEmptyResourceMap(cfg.OrgID)
id := n.Path
subject := n.GetSubject(cfg)
header := &fragments.Header{
OrgID: cfg.OrgID,
Spec: cfg.Spec,
HubID: fmt.Sprintf(
"%s_%s_%s",
cfg.OrgID,
cfg.Spec,
strings.Replace(id, "/", "-", -1),
),
DocType: fragments.FragmentGraphDocType,
EntryURI: subject,
NamedGraphURI: fmt.Sprintf("%s/graph", subject),
Tags: []string{"ead"},
Modified: fragments.NowInMillis(),
Revision: cfg.Revision,
}
if len(cfg.Tags) != 0 {
header.Tags = append(header.Tags, cfg.Tags...)
}
if tags, ok := config.Config.DatasetTagMap.Get(header.Spec); ok {
header.Tags = append(header.Tags, tags...)
}
cfg.HubIDs <- &NodeEntry{
HubID: header.HubID,
Path: id,
Order: n.Order,
Title: n.Header.GetTreeLabel(),
}
// Create tree before FragmentGraph
tree := cfg.CreateTree(cfg, n, header.HubID, id)
fg := fragments.NewFragmentGraph()
fg.Meta = header
fg.Tree = tree
for idx, t := range n.Triples(cfg) {
if err := rm.AppendOrderedTriple(t, false, idx); err != nil {
return nil, nil, err
}
}
fg.SetResources(rm)
return fg, rm, nil
}
func CreateTree(cfg *NodeConfig, n *Node, hubID string, id string) *fragments.Tree {
tree := &fragments.Tree{}
tree.HubID = hubID
tree.ChildCount = n.Children
tree.Type = n.Type
tree.CLevel = fmt.Sprintf("%s%s", CLevelLeader, id)
tree.Label = n.Header.GetTreeLabel()
tree.UnitID = n.Header.InventoryNumber
tree.Leaf = n.getFirstBranch()
tree.Parent = n.getSecondBranch()
tree.Depth = len(n.ParentIDs) + 1
tree.HasDigitalObject = n.Header.HasDigitalObject
tree.DaoLink = n.Header.DaoLink
tree.SortKey = n.Order
tree.Periods = n.Header.GetPeriods()
tree.MimeTypes = []string{}
tree.ManifestLink = ""
tree.RawContent = []string{}
for _, t := range n.triples {
switch t.Predicate.RawValue() {
case NewResource("unitTitle").RawValue():
case NewResource("geogname").RawValue():
case NewResource("persname").RawValue():
case NewResource("datetext").RawValue():
case NewResource("dateiso").RawValue():
default:
tree.RawContent = append(tree.RawContent, t.Object.RawValue())
}
}
tree.Access = n.AccessRestrict
tree.HasRestriction = n.AccessRestrict != ""
tree.PhysDesc = n.Header.Physdesc
if tree.HasDigitalObject {
daoCfg := newDaoConfig(cfg, tree)
daoCfg.Source = SourceEad
daoCfg.FilterTypes = []string{n.Header.Genreform}
// must happen here because the check needs the daoCfg to not be written yet
hasOrphanedMetsFile := daoCfg.hasOrphanedMetsFile()
if err := daoCfg.Write(); err != nil {
log.Error().Err(err).Msg("unable to write daocfg to disk")
}
if cfg.DaoFn != nil {
metsExists := true
if cfg.ProcessDigitalIfMissing == true {
metsPath := daoCfg.GetMetsFilePath()
if _, err := os.Stat(metsPath); err != nil {
metsExists = false
}
}
if cfg.ProcessDigital || hasOrphanedMetsFile || (!metsExists && cfg.ProcessDigitalIfMissing) {
log.Debug().
Str("archiveID", daoCfg.ArchiveID).
Str("InventoryID", daoCfg.InventoryID).
Str("uuid", daoCfg.UUID).
Msg("force processing mets files")
if err := cfg.DaoFn(&daoCfg); err != nil {
log.Error().Err(err).
Str("archiveID", daoCfg.ArchiveID).
Str("InventoryID", daoCfg.InventoryID).
Str("uuid", daoCfg.UUID).
Str("url", daoCfg.Link).
Msg("unable to process dao link")
cfg.MetsCounter.AppendError(daoCfg.InventoryID, err.Error())
return tree
}
tree.MimeTypes = daoCfg.MimeTypes
tree.DOCount = daoCfg.ObjectCount
}
}
}
return tree
}
// GetSubject creates subject URI for the parent Node
// the header itself is an anonymous BlankNode
func (n *Node) GetSubject(cfg *NodeConfig) string {
id := n.Path
return newSubject(cfg, id)
}
type convert func(string) r.Term
func addNonEmptyTriple(s r.Term, p, o string, oType convert) *r.Triple {
if o == "" {
return nil
}
return r.NewTriple(
s,
NewResource(p),
oType(o),
)
}
// Triples returns a list of triples created from an Archive Node
// Nested elements are linked as object references
func (n *Node) Triples(cfg *NodeConfig) []*r.Triple {
subject := n.GetSubject(cfg)
s := r.NewResource(subject)
triples := n.triples
t := func(s r.Term, p, o string, oType convert) {
t := addNonEmptyTriple(s, p, o, oType)
if t != nil {
triples = append(triples, t)
}
return
}
t(s, "cLevel", n.CTag, r.NewLiteral)
t(s, "branchID", n.BranchID, r.NewLiteral)
t(s, "cType", n.Type, r.NewLiteral)
t(s, "cSubtype", n.SubType, r.NewLiteral)
t(s, "genreform", n.Header.Genreform, r.NewLiteral)
for _, p := range cfg.PeriodDesc {
t(s, "periodDesc", p, r.NewLiteral)
}
return triples
}
func (n *Node) AppendTriple(triple *r.Triple) {
n.triples = append(n.triples, triple)
}