/
jats.cljc
139 lines (125 loc) · 5.84 KB
/
jats.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
(ns bartleby.language.jats
(:require [clojure.string :as str]
[clojure.zip :as zip]
[bartleby.util :refer [split-fullname]]
[bartleby.language.tex :as tex]
[bartleby.bibliography] ; otherwise cloverage breaks the AsElements protocol extensions
[clojure.data.xml :as xml :refer [element]]
[clojure.data.xml.protocols :refer [AsElements as-elements]])
(:import (bartleby.bibliography Field Reference Gloss)))
(def ^:private public-identifier "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN")
(def ^:private system-identifier "https://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd")
(def ^:private doctype (format "<!DOCTYPE article PUBLIC \"%s\" \"%s\">" public-identifier system-identifier))
(defn- wrap
"Wrap the string s in left and right padding"
([s both] (wrap s both both))
([s left right] (str left s right)))
(defn- xml-name
"Sanitize the string s into a valid XML name.
* Prefix with underscore if the first character is not a valid first character.
* Remove any non- letter/number/some punctuation characters."
[s]
(-> s
(str/replace #"^[^A-Za-z_:]" "_$0")
(str/replace #"[^A-Za-z0-9._:-]" "")))
(defn- as-name-element
[fullname]
(let [[given-names surname] (split-fullname fullname)]
(element :name {}
(element :surname {} surname)
(element :given-names {} given-names))))
(defn- as-name-elements
[bibtexnames]
(->> (str/split bibtexnames #"\s+and\s+")
(map str/trim)
(map as-name-element)))
(defn- as-fpage-lpage-elements
[pages]
; split on hyphens, n-dashes, or m-dashes
(map #(element %1 {} %2) [:fpage :lpage] (str/split pages #"[-–—]+" 2)))
(defn- create-comment
"pad content with spaces and escape contents if needed"
[content]
(-> content
(str/trim)
(str/replace #"-{2,}" "–") ; replace any sequences of multiple hyphens with a single n-dash
(wrap " ")
(xml/xml-comment)))
; mapping from keywordified Field. :key values to (fn [value] ...element(s)...)
(def ^:private field-mapping {:address #(element :publisher-loc {} %)
:author #(element :person-group {:person-group-type "author"} (as-name-elements %))
:booktitle #(element :source {} %)
:day #(element :day {} %)
:doi #(element :pub-id {:pub-id-type "doi"} %)
:edition #(element :edition {} %)
:editor #(element :person-group {:person-group-type "editor"} (as-name-elements %))
:institution #(element :institution {} %)
:isbn #(element :isbn {} %)
:issn #(element :issn {} %)
:issue #(element :issue {} %) ; "issue" is not a legit BibTeX field but whatever
:journal #(element :source {} %)
:month #(element :month {} %)
:note #(element :comment {} %)
:number #(element :issue {} %)
:page #(as-fpage-lpage-elements %) ; "page" should be "pages" but why not
:pages #(as-fpage-lpage-elements %)
:publisher #(element :publisher-name {} %)
:school #(element :institution {} %)
:series #(element :series {} %)
:title #(element :article-title {} %)
:url #(element :uri {} %)
:volume #(element :volume {} %)
:year #(element :year {} %)})
(extend-protocol AsElements
Field
(as-elements [{:keys [key value]}]
(let [key-keyword (keyword (str/lower-case key))
value-string (-> value tex/simplify tex/write-str)]
(list (if-let [value-element (get field-mapping key-keyword)]
(value-element value-string)
(create-comment (str key " = " value-string))))))
Reference
(as-elements [{:keys [pubtype citekey fields]}]
(list (element :ref {:id (xml-name citekey)}
(element :element-citation {:publication-type pubtype}
(as-elements fields)))))
Gloss
(as-elements [{:keys [lines]}]
(list (create-comment (str/join \newline lines)))))
(defn- find-or-append
"Go through the children of `parent` (a zipper location) in order and find
the first loc that matches pred, or insert not-found and return its loc.
We have to use the parent, not the first child, since the element may be empty."
[parent pred not-found]
(loop [loc (zip/down parent)]
(if loc
(if (pred loc)
loc ; found!
(recur (zip/right loc))) ; keep going
; otherwise, we're at the end with no match; append and go into the new loc
(-> parent (zip/append-child not-found) zip/down zip/rightmost))))
(defn- loc-tag=
"Returns a predicate that takes a zipper loc and returns true
if that loc's value's tag is equal to `tag`"
[tag]
(fn tag=?
[loc]
(= tag (:tag (zip/node loc)))))
(defn set-article-refs
"Find the /article/back/ref-list element in article
and add all of `entries` as refs (by converting with as-elements) to it"
[article entries]
(-> (or article (element :article {:dtd-version "1.1"}))
(zip/xml-zip)
(find-or-append (loc-tag= :back) (element :back {}))
(find-or-append (loc-tag= :ref-list) (element :ref-list {}))
(zip/replace (xml/element* :ref-list {} (as-elements entries)))
(zip/root)))
(defn write-str
"Generate XML string with JATS doctype"
[e]
(xml/emit-str e :encoding "UTF-8" :doctype doctype))
(defn write
"Write JATS XML with JATS doctype"
[e ^java.io.Writer writer]
(xml/emit e writer :encoding "UTF-8" :doctype doctype))