From fa3513b104e089a8969f3ae0dab98eba78b1a942 Mon Sep 17 00:00:00 2001 From: Julia Diaz Date: Tue, 17 Oct 2023 17:16:00 +0100 Subject: [PATCH] JATS reader: fix handling of alt-text (#9134) Previously we were looking for an attribute that doesn't exist in JATS; alt-text is provided by a child element. Closes #9130. --- src/Text/Pandoc/Readers/JATS.hs | 20 ++++++------ test/Tests/Readers/JATS.hs | 36 ++++++++++++++++++++++ test/command/5321.md | 8 +++-- test/command/jats-figure-alt-text.md | 7 +++-- test/jats-reader.native | 46 +++++++++++++++++++++++----- test/jats-reader.xml | 34 +++++++++++++++----- 6 files changed, 121 insertions(+), 30 deletions(-) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index fd90044af571..6cadef3069bf 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -138,18 +138,16 @@ trimNl = T.dropAround (== '\n') -- function that is used by both graphic (in parseBlock) -- and inline-graphic (in parseInline) -getGraphic :: PandocMonad m - => Maybe (Inlines, Text) -> Element -> JATS m Inlines -getGraphic mbfigdata e = do +getGraphic :: PandocMonad m => Element -> JATS m Inlines +getGraphic e = do let atVal a = attrValue a e - (ident, title, capt) = - case mbfigdata of - Just (capt', i) -> (i, "fig:" <> atVal "title", capt') - Nothing -> (atVal "id", atVal "title", - text (atVal "alt-text")) + let altText = case filterElement (named "alt-text") e of + Just alt -> textContent alt + Nothing -> mempty + (ident, title, altText') = (atVal "id", atVal "title", text altText) attr = (ident, T.words $ atVal "role", []) imageUrl = atVal "href" - return $ imageWith attr imageUrl title capt + return $ imageWith attr imageUrl title altText' getBlocks :: PandocMonad m => Element -> JATS m Blocks getBlocks e = mconcat <$> @@ -198,7 +196,7 @@ parseBlock (Elem e) = do "table-wrap-foot" -> parseBlockWithHeader "trans-abstract" -> parseBlockWithHeader "verse-group" -> parseBlockWithHeader - "graphic" -> para <$> getGraphic Nothing e + "graphic" -> para <$> getGraphic e "journal-meta" -> parseMetadata e "article-meta" -> parseMetadata e "custom-meta" -> parseMetadata e @@ -622,7 +620,7 @@ parseInline (Elem e) = "code" -> codeWithLang "monospace" -> codeWithLang - "inline-graphic" -> getGraphic Nothing e + "inline-graphic" -> getGraphic e "disp-quote" -> do qt <- gets jatsQuoteType let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs index b7d36dc00a1f..7a49d5e12e6e 100644 --- a/test/Tests/Readers/JATS.hs +++ b/test/Tests/Readers/JATS.hs @@ -35,6 +35,42 @@ tests = [ testGroup "inline code" , testGroup "images" [ test jats "basic" $ "" =?> para (image "/url" "title" mempty) + , test jats "alt-text" $ + "\n\ + \ Alternative text of the graphic\n\ + \ \n\ + \ This is the title of the caption\n\ + \

Google doodle from 14 March 2003

\n\ + \
" + =?> Para [ Image + ( "graphic001" + , [ "This" + , "is" + , "the" + , "role" + , "of" + , "the" + , "graphic" + ] + , [] + ) + [ Str "Alternative" + , Space + , Str "text" + , Space + , Str "of" + , Space + , Str "the" + , Space + , Str "graphic" + ] + ( "https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660" + , "This is the title of the graphic" + ) + ] ] , test jats "bullet list" $ "\n\ diff --git a/test/command/5321.md b/test/command/5321.md index 200cd6308c4c..8a665568faa3 100644 --- a/test/command/5321.md +++ b/test/command/5321.md @@ -4,7 +4,9 @@

bar

- + + baz + ^D [ Figure @@ -23,7 +25,9 @@ foo

bar

- + + baz + ^D [ Figure diff --git a/test/command/jats-figure-alt-text.md b/test/command/jats-figure-alt-text.md index 555d44f16aa7..999e7b9c2fed 100644 --- a/test/command/jats-figure-alt-text.md +++ b/test/command/jats-figure-alt-text.md @@ -5,8 +5,11 @@

bar

alternative-decription - + + baz + + ^D [ Figure ( "fig-1" , [] , [] ) @@ -16,4 +19,4 @@ [ Image ( "" , [] , [] ) [ Str "baz" ] ( "foo.png" , "" ) ] ] ] -``` +``` \ No newline at end of file diff --git a/test/jats-reader.native b/test/jats-reader.native index d1c36405def9..51048dd5a53f 100644 --- a/test/jats-reader.native +++ b/test/jats-reader.native @@ -745,6 +745,8 @@ Pandoc , Para [ Str "Einstein" , Space + , Str "definitely" + , Space , Str "showed" , Space , Str "that" @@ -754,10 +756,29 @@ Pandoc [ Para [ Str "Abstract" , Space , Str "text" ] , Para [ Image - ( "" , [] , [] ) - [] + ( "graphic001" + , [ "This" + , "is" + , "the" + , "role" + , "of" + , "the" + , "graphic" + ] + , [] + ) + [ Str "Alternative" + , Space + , Str "text" + , Space + , Str "of" + , Space + , Str "the" + , Space + , Str "graphic" + ] ( "https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660" - , "" + , "This is the title of the graphic" ) ] , Para [ Math DisplayMath "e=mc^2" ] @@ -802,11 +823,14 @@ Pandoc [ Para [ Str "Abstract" , Space , Str "text" ] , Para [ Image - ( "" , [] , [] ) - [] - ( "https://lh3.googleusercontent.com/dB7iirJ3ncQaVMBGE2YX-WCeoAVIChb6NAzoFcKCFChMsrixJvD7ZRbvcaC-ceXEzXYaoH4K5vaoRDsUyBHFkpIDPnsn3bnzovbvi0a2Gg=s660" - , "" - ) + ( "graphic003" , [] , [] ) + [ Str "Alternative" + , Space + , Str "text" + , Space + , Str "1" + ] + ( "Title 3" , "" ) ] , Para [ Math DisplayMath "e=mc^2" ] ] @@ -821,6 +845,12 @@ Pandoc , Space , Str "because\8230" ] + , Figure + ( "fig-1" , [] , [] ) + (Caption Nothing [ Plain [ Str "bar" ] ]) + [ Plain [ Str "alternative-decription" ] + , Para [ Image ( "" , [] , [] ) [] ( "foo.png" , "" ) ] + ] , Header 1 ( "code-blocks" , [] , [] ) diff --git a/test/jats-reader.xml b/test/jats-reader.xml index 3fbf98d9dc10..ce8708ba8fb2 100644 --- a/test/jats-reader.xml +++ b/test/jats-reader.xml @@ -193,19 +193,26 @@ Formulae with miscelaneus elements

Regardless of whether or not they are inside a paragraph, should be wrapped in a div, and displayed in a block:

Inside a paragraph:

-

Einstein showed that +

Einstein definitely showed that

Abstract text

- - Alternative text 1 -

Google doodle from 14 March 2003

+ + Alternative text of the graphic + + This is the title of the caption +

Google doodle from 14 March 2003

e=mc2 - + Alternative text 2

Google doodle from 14 March 2003

@@ -218,14 +225,18 @@

Abstract text

- + Alternative text 1

Google doodle from 14 March 2003

e=mc2 - + Alternative text 2

Google doodle from 14 March 2003

@@ -234,6 +245,15 @@

This formula is important becauseā€¦

+ + + +

bar

+ + alternative-decription + +
+
Code Blocks

Code: