/
item.go
49 lines (43 loc) · 1.45 KB
/
item.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
// Copyright (c) 2021 Andrew Archibald
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package ia
import (
"encoding/xml"
"os"
"path/filepath"
)
// ItemMeta contains item metadata in the *_meta.xml file in the root of
// an item.
type ItemMeta struct {
Identifier string `xml:"identifier"`
Collections []string `xml:"collection"`
Description string `xml:"description"`
Mediatype string `xml:"mediatype"` // e.g. "software"
Subject string `xml:"subject"`
Title string `xml:"title"`
Uploader string `xml:"uploader"`
Publicdate string `xml:"publicdate"` // "2006-01-02 15:04:05" format
Addeddate string `xml:"addeddate"` // "2006-01-02 15:04:05" format
Curation string `xml:"curation"`
BackupLocation string `xml:"backup_location"` // removed from meta in April 2020
}
func ReadItemMeta(dir string) (*ItemMeta, error) {
name := filepath.Base(dir) + "_meta.xml"
f, err := os.Open(filepath.Join(dir, name))
if err != nil {
return nil, err
}
defer f.Close()
var meta ItemMeta
if err := xml.NewDecoder(f).Decode(&meta); err != nil {
return nil, err
}
return &meta, nil
}
const TimestampFormat = "20060102150405"
func PageURL(url, timestamp string) string {
return "https://web.archive.org/web/" + timestamp + "id_/" + url
}