Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BITS-enabled JATS reader #9138

Merged
merged 8 commits into from Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
86 changes: 72 additions & 14 deletions src/Text/Pandoc/Readers/JATS.hs
Expand Up @@ -165,8 +165,10 @@ parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
parseBlock (Elem e) = do
sectionLevel <- gets jatsSectionLevel
let parseBlockWithHeader = wrapWithHeader (sectionLevel+1) (getBlocks e)

case qName (elName e) of
"book" -> parseBook
"book-part-wrapper" -> parseBook
"p" -> parseMixed para (elContent e)
"code" -> codeBlockWithLang
"preformat" -> codeBlockWithLang
Expand Down Expand Up @@ -203,6 +205,7 @@ parseBlock (Elem e) = do
"article-meta" -> parseMetadata e
"custom-meta" -> parseMetadata e
"processing-meta" -> return mempty
"book-meta" -> parseMetadata e
"title" -> return mempty -- processed by header
"label" -> return mempty -- processed by header
"table" -> parseTable
Expand All @@ -225,6 +228,19 @@ parseBlock (Elem e) = do
then blockFormula displayMath e
else divWith (attrValue "id" e, ["disp-formula"], [])
<$> getBlocks e
"index" -> parseBlockWithHeader
"index-div" -> parseBlockWithHeader
"index-group" -> parseBlockWithHeader
"index-title-group" -> return mempty -- handled by index and index-div
"toc" -> parseBlockWithHeader
"toc-div" -> parseBlockWithHeader
"toc-entry" -> parseBlockWithHeader
"toc-group" -> parseBlockWithHeader
"toc-title-group" -> return mempty -- handled by toc
"legend" -> parseBlockWithHeader
"dedication" -> parseBlockWithHeader
"foreword" -> parseBlockWithHeader
"preface" -> parseBlockWithHeader
"?xml" -> return mempty
_ -> getBlocks e
where parseMixed container conts = do
Expand Down Expand Up @@ -370,36 +386,53 @@ parseBlock (Elem e) = do
parseElement = filterChildren isEntry
wrapWithHeader n mBlocks = do
isBook <- gets jatsBook
let n' = if isBook || n == 0 then n + 1 else n
let n' = case (filterChild (named "title") e >>= maybeAttrValue "display-as") of
Just t -> read $ T.unpack t
Nothing -> if isBook || n == 0 then n + 1 else n
headerText <- case filterChild (named "title") e of
Just t -> getInlines t
Nothing -> return mempty
Just t -> case maybeAttrValue "supress" t of
Just s -> if s == "no"
then getInlines t
else return mempty
Nothing -> getInlines t
Nothing -> do
let name = qName (elName e)
if (name == "dedication" || name == "foreword" || name == "preface")
then return $ str $ T.toTitle name
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to use localized (translated) versions of these names rather than the English.
However, we don't currently have localizations for these, so this will have to wait.

Copy link
Contributor Author

@kamoe kamoe Oct 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. The rationale is that, if there are non-English language named sections, then these will probably have a <title>, captured in the case just above. But might still be something to consider in the future.

else case filterChild (named "index-title-group") e >>= filterChild (named "title") of
Just i -> getInlines i
Nothing -> case filterChild (named "toc-title-group") e >>= filterChild (named "title") of
Just t -> getInlines t
Nothing -> return mempty
oldN <- gets jatsSectionLevel
modify $ \st -> st{ jatsSectionLevel = n }
blocks <- mBlocks
let ident = attrValue "id" e
modify $ \st -> st{ jatsSectionLevel = oldN }
return $ (if
headerText == mempty
then mempty
else headerWith (ident,[],[]) n' headerText) <> blocks
return $ (if headerText == mempty
then mempty
else headerWith (ident,[],[]) n' headerText) <> blocks
parseBook = do
modify $ \st -> st{ jatsBook = True }
getBlocks e

getInlines :: PandocMonad m => Element -> JATS m Inlines
getInlines e' = trimInlines . mconcat <$>
mapM parseInline (elContent e')

parseMetadata :: PandocMonad m => Element -> JATS m Blocks
parseMetadata e = do
getTitle e
getAuthors e
isBook <- gets jatsBook
if isBook then getBookTitle e else getArticleTitle e
if isBook then getBookAuthors e else getArticleAuthors e
getAffiliations e
getAbstract e
getPubDate e
getPermissions e
return mempty

getTitle :: PandocMonad m => Element -> JATS m ()
getTitle e = do
getArticleTitle :: PandocMonad m => Element -> JATS m ()
getArticleTitle e = do
tit <- case filterElement (named "article-title") e of
Just s -> getInlines s
Nothing -> return mempty
Expand All @@ -410,8 +443,21 @@ getTitle e = do
when (tit /= mempty) $ addMeta "title" tit
when (subtit /= mempty) $ addMeta "subtitle" subtit

getAuthors :: PandocMonad m => Element -> JATS m ()
getAuthors e = do

getBookTitle :: PandocMonad m => Element -> JATS m ()
getBookTitle e = do
tit <- case (filterElement (named "book-title-group") e >>= filterElement (named "book-title")) of
Just s -> getInlines s
Nothing -> return mempty
subtit <- case (filterElement (named "book-title-group") e >>= filterElement (named "subtitle")) of
Just s -> (text ": " <>) <$>
getInlines s
Nothing -> return mempty
when (tit /= mempty) $ addMeta "title" tit
when (subtit /= mempty) $ addMeta "subtitle" subtit

getArticleAuthors :: PandocMonad m => Element -> JATS m ()
getArticleAuthors e = do
authors <- mapM getContrib $ filterElements
(\x -> named "contrib" x &&
attrValue "contrib-type" x == "author") e
Expand All @@ -422,6 +468,18 @@ getAuthors e = do
(a:as, ns) -> reverse as ++ [a <> mconcat ns]
unless (null authors) $ addMeta "author" authors'

getBookAuthors :: PandocMonad m => Element -> JATS m ()
getBookAuthors e = do
authors <- mapM getContrib $ filterElements (\x -> named "contrib-group" x) e
>>= filterElements (\x -> named "contrib" x &&
attrValue "contrib-type" x == "author")
authorNotes <- mapM getInlines $ filterElements (named "author-notes") e
let authors' = case (reverse authors, authorNotes) of
([], _) -> []
(_, []) -> authors
(a:as, ns) -> reverse as ++ [a <> mconcat ns]
unless (null authors) $ addMeta "author" authors'

getAffiliations :: PandocMonad m => Element -> JATS m ()
getAffiliations x = do
affs <- mapM getInlines $ filterChildren (named "aff") x
Expand Down
19 changes: 19 additions & 0 deletions test/command/8867.md
@@ -0,0 +1,19 @@
```
% pandoc -f jats -t native
<article-meta>
<title-group>
<article-title></article-title>
</title-group>
<permissions>
<copyright-statement>© 2023, Ellerman et al</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Ellerman et al</copyright-holder>
<license license-type="open-access">
<ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This document is distributed under a Creative Commons Attribution 4.0 International license.</license-p>
</license>
</permissions>
</article-meta>
^D
[]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this what you intended? Shouldn't this test be run with -s so we get the metadata?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course. Just added this, plus a few additional tests for metadata in both book and book-part-wrapper cases. Thanks for pointing out!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also separate tests from when we just want to see content, not metadata.

```
175 changes: 175 additions & 0 deletions test/command/bits-title-display-as.md
@@ -0,0 +1,175 @@
```
% pandoc -f jats -t native
<sec>
<title>THE EUROPEAN UNION EXPLAINED</title>
</sec>
^D
[ Header
1
( "" , [] , [] )
[ Str "THE"
, Space
, Str "EUROPEAN"
, Space
, Str "UNION"
, Space
, Str "EXPLAINED"
]
]
```

```
% pandoc -f jats -t native
<sec>
<title display-as="3">THE EUROPEAN UNION EXPLAINED</title>
</sec>
^D
[ Header
3
( "" , [] , [] )
[ Str "THE"
, Space
, Str "EUROPEAN"
, Space
, Str "UNION"
, Space
, Str "EXPLAINED"
]
]
```

```
% pandoc -f jats -t native
<body>
<sec>
<title>The European Parliament</title>
<p>Members of the European Parliament (MEPs) are directly elected by EU citizens.</p>
<sec>
<title display-as="3">Composition of the European Parliament</title>
<p>The seats in the European Parliament are allocated among the Member States.</p>
</sec>
<sec>
<title>Composition of the European Parliament - II </title>
<p>Most MEPs are associated with a national political party in their home country.</p>
</sec>
</sec>
</body>
^D
[ Header
1
( "" , [] , [] )
[ Str "The"
, Space
, Str "European"
, Space
, Str "Parliament"
]
, Para
[ Str "Members"
, Space
, Str "of"
, Space
, Str "the"
, Space
, Str "European"
, Space
, Str "Parliament"
, Space
, Str "(MEPs)"
, Space
, Str "are"
, Space
, Str "directly"
, Space
, Str "elected"
, Space
, Str "by"
, Space
, Str "EU"
, Space
, Str "citizens."
]
, Header
3
( "" , [] , [] )
[ Str "Composition"
, Space
, Str "of"
, Space
, Str "the"
, Space
, Str "European"
, Space
, Str "Parliament"
]
, Para
[ Str "The"
, Space
, Str "seats"
, Space
, Str "in"
, Space
, Str "the"
, Space
, Str "European"
, Space
, Str "Parliament"
, Space
, Str "are"
, Space
, Str "allocated"
, Space
, Str "among"
, Space
, Str "the"
, Space
, Str "Member"
, Space
, Str "States."
]
, Header
2
( "" , [] , [] )
[ Str "Composition"
, Space
, Str "of"
, Space
, Str "the"
, Space
, Str "European"
, Space
, Str "Parliament"
, Space
, Str "-"
, Space
, Str "II"
]
, Para
[ Str "Most"
, Space
, Str "MEPs"
, Space
, Str "are"
, Space
, Str "associated"
, Space
, Str "with"
, Space
, Str "a"
, Space
, Str "national"
, Space
, Str "political"
, Space
, Str "party"
, Space
, Str "in"
, Space
, Str "their"
, Space
, Str "home"
, Space
, Str "country."
]
]
```