Skip to content

Commit

Permalink
JATS reader: fix handling of alt-text (#9134)
Browse files Browse the repository at this point in the history
Previously we were looking for an attribute that doesn't exist in
JATS; alt-text is provided by a child element.

Closes #9130.
  • Loading branch information
kamoe committed Oct 17, 2023
1 parent 802c610 commit fa3513b
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 30 deletions.
20 changes: 9 additions & 11 deletions src/Text/Pandoc/Readers/JATS.hs
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,16 @@ trimNl = T.dropAround (== '\n')

-- function that is used by both graphic (in parseBlock)
-- and inline-graphic (in parseInline)
getGraphic :: PandocMonad m
=> Maybe (Inlines, Text) -> Element -> JATS m Inlines
getGraphic mbfigdata e = do
getGraphic :: PandocMonad m => Element -> JATS m Inlines
getGraphic e = do
let atVal a = attrValue a e
(ident, title, capt) =
case mbfigdata of
Just (capt', i) -> (i, "fig:" <> atVal "title", capt')
Nothing -> (atVal "id", atVal "title",
text (atVal "alt-text"))
let altText = case filterElement (named "alt-text") e of
Just alt -> textContent alt
Nothing -> mempty
(ident, title, altText') = (atVal "id", atVal "title", text altText)
attr = (ident, T.words $ atVal "role", [])
imageUrl = atVal "href"
return $ imageWith attr imageUrl title capt
return $ imageWith attr imageUrl title altText'

getBlocks :: PandocMonad m => Element -> JATS m Blocks
getBlocks e = mconcat <$>
Expand Down Expand Up @@ -198,7 +196,7 @@ parseBlock (Elem e) = do
"table-wrap-foot" -> parseBlockWithHeader
"trans-abstract" -> parseBlockWithHeader
"verse-group" -> parseBlockWithHeader
"graphic" -> para <$> getGraphic Nothing e
"graphic" -> para <$> getGraphic e
"journal-meta" -> parseMetadata e
"article-meta" -> parseMetadata e
"custom-meta" -> parseMetadata e
Expand Down Expand Up @@ -622,7 +620,7 @@ parseInline (Elem e) =
"code" -> codeWithLang
"monospace" -> codeWithLang

"inline-graphic" -> getGraphic Nothing e
"inline-graphic" -> getGraphic e
"disp-quote" -> do
qt <- gets jatsQuoteType
let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
Expand Down
36 changes: 36 additions & 0 deletions test/Tests/Readers/JATS.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,42 @@ tests = [ testGroup "inline code"
, testGroup "images"
[ test jats "basic" $ "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />"
=?> para (image "/url" "title" mempty)
, test jats "alt-text" $
"<graphic id=\"graphic001\"\n\
\ xlink:href=\"https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660\"\n\
\ xlink:title=\"This is the title of the graphic\"\n\
\ xlink:role=\"This is the role of the graphic\">\n\
\ <alt-text>Alternative text of the graphic</alt-text>\n\
\ <caption>\n\
\ <title>This is the title of the caption</title>\n\
\ <p>Google doodle from 14 March 2003</p></caption>\n\
\ </graphic>"
=?> Para [ Image
( "graphic001"
, [ "This"
, "is"
, "the"
, "role"
, "of"
, "the"
, "graphic"
]
, []
)
[ Str "Alternative"
, Space
, Str "text"
, Space
, Str "of"
, Space
, Str "the"
, Space
, Str "graphic"
]
( "https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660"
, "This is the title of the graphic"
)
]
]
, test jats "bullet list" $
"<list list-type=\"bullet\">\n\
Expand Down
8 changes: 6 additions & 2 deletions test/command/5321.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
<caption>
<p>bar</p>
</caption>
<graphic xlink:href="foo.png" xlink:alt-text="baz" />
<graphic xlink:href="foo.png">
<alt-text>baz</alt-text>
</graphic>
</fig>
^D
[ Figure
Expand All @@ -23,7 +25,9 @@
<title>foo</title>
<p>bar</p>
</caption>
<graphic xlink:href="foo.png" xlink:alt-text="baz" />
<graphic xlink:href="foo.png">
<alt-text>baz</alt-text>
</graphic>
</fig>
^D
[ Figure
Expand Down
7 changes: 5 additions & 2 deletions test/command/jats-figure-alt-text.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
<p>bar</p>
</caption>
<alt-text>alternative-decription</alt-text>
<graphic xlink:href="foo.png" xlink:alt-text="baz" />
<graphic xlink:href="foo.png">
<alt-text>baz</alt-text>
</graphic>
</fig>
^D
[ Figure
( "fig-1" , [] , [] )
Expand All @@ -16,4 +19,4 @@
[ Image ( "" , [] , [] ) [ Str "baz" ] ( "foo.png" , "" ) ]
]
]
```
```
46 changes: 38 additions & 8 deletions test/jats-reader.native
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,8 @@ Pandoc
, Para
[ Str "Einstein"
, Space
, Str "definitely"
, Space
, Str "showed"
, Space
, Str "that"
Expand All @@ -754,10 +756,29 @@ Pandoc
[ Para [ Str "Abstract" , Space , Str "text" ]
, Para
[ Image
( "" , [] , [] )
[]
( "graphic001"
, [ "This"
, "is"
, "the"
, "role"
, "of"
, "the"
, "graphic"
]
, []
)
[ Str "Alternative"
, Space
, Str "text"
, Space
, Str "of"
, Space
, Str "the"
, Space
, Str "graphic"
]
( "https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660"
, ""
, "This is the title of the graphic"
)
]
, Para [ Math DisplayMath "e=mc^2" ]
Expand Down Expand Up @@ -802,11 +823,14 @@ Pandoc
[ Para [ Str "Abstract" , Space , Str "text" ]
, Para
[ Image
( "" , [] , [] )
[]
( "https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660"
, ""
)
( "graphic003" , [] , [] )
[ Str "Alternative"
, Space
, Str "text"
, Space
, Str "1"
]
( "Title 3" , "" )
]
, Para [ Math DisplayMath "e=mc^2" ]
]
Expand All @@ -821,6 +845,12 @@ Pandoc
, Space
, Str "because\8230"
]
, Figure
( "fig-1" , [] , [] )
(Caption Nothing [ Plain [ Str "bar" ] ])
[ Plain [ Str "alternative-decription" ]
, Para [ Image ( "" , [] , [] ) [] ( "foo.png" , "" ) ]
]
, Header
1
( "code-blocks" , [] , [] )
Expand Down
34 changes: 27 additions & 7 deletions test/jats-reader.xml
Original file line number Diff line number Diff line change
Expand Up @@ -193,19 +193,26 @@
<title>Formulae with miscelaneus elements</title>
<p>Regardless of whether or not they are inside a paragraph, should be wrapped in a div, and displayed in a block:</p>
<p><bold>Inside a paragraph:</bold></p>
<p>Einstein showed that
<p>Einstein definitely showed that
<disp-formula>
<abstract>
<p>Abstract text</p>
</abstract>
<graphic xlink:href="https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660">
<alt-text>Alternative text 1</alt-text>
<caption><p>Google doodle from 14 March 2003</p></caption>
<graphic id="graphic001"
xlink:href="https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660"
xlink:title="This is the title of the graphic"
xlink:role="This is the role of the graphic">
<alt-text>Alternative text of the graphic</alt-text>
<caption>
<title>This is the title of the caption</title>
<p>Google doodle from 14 March 2003</p></caption>
</graphic>
<alternatives>
<tex-math><![CDATA[e=mc^2]]></tex-math>
<mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:msup><mml:mi>c</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math>
<graphic xlink:href="https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660">
<graphic id="graphic001"
xlink:href="https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660"
xlink:title="Title 2">
<alt-text>Alternative text 2</alt-text>
<caption><p>Google doodle from 14 March 2003</p></caption>
</graphic>
Expand All @@ -218,14 +225,18 @@
<abstract>
<p>Abstract text</p>
</abstract>
<graphic xlink:href="https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660">
<graphic id="graphic003"
xlink:href="https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660"
xlink:href="Title 3">
<alt-text>Alternative text 1</alt-text>
<caption><p>Google doodle from 14 March 2003</p></caption>
</graphic>
<alternatives>
<tex-math><![CDATA[e=mc^2]]></tex-math>
<mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:msup><mml:mi>c</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math>
<graphic xlink:href="https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660">
<graphic id="graphic004"
xlink:href="https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660"
xlink:href="Title 4">
<alt-text>Alternative text 2</alt-text>
<caption><p>Google doodle from 14 March 2003</p></caption>
</graphic>
Expand All @@ -234,6 +245,15 @@
<p>This formula is important because…</p>
</sec>
</sec>
<sec id="figures">
<fig id="fig-1">
<caption>
<p>bar</p>
</caption>
<alt-text>alternative-decription</alt-text>
<graphic xlink:href="foo.png" xlink:alt-text="baz" />
</fig>
</sec>
<sec id="code-blocks">
<title>Code Blocks</title>
<p>Code:</p>
Expand Down

0 comments on commit fa3513b

Please sign in to comment.