Skip to content

Commit

Permalink
Add rowspan, colspan and alignment to cells in jats table reader (#8726)
Browse files Browse the repository at this point in the history
Partially addresses #8408
  • Loading branch information
noahmalmed committed Apr 5, 2023
1 parent 9f718da commit 0353e11
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 16 deletions.
46 changes: 30 additions & 16 deletions src/Text/Pandoc/Readers/JATS.hs
Expand Up @@ -268,26 +268,29 @@ parseBlock (Elem e) =
Just c -> filterChildren isColspec c
_ -> filterChildren isColspec e'
let isRow x = named "row" x || named "tr" x
headrows <- case filterChild (named "thead") e' of
Just h -> case filterChild isRow h of
Just x -> parseRow x
Nothing -> return []
Nothing -> return []
bodyrows <- case filterChild (named "tbody") e' of
Just b -> mapM parseRow
$ filterChildren isRow b
Nothing -> mapM parseRow
$ filterChildren isRow e'

-- list of header cell elements
let headRowElements = case filterChild (named "thead") e' of
Just h -> maybe [] parseElement (filterChild isRow h)
Nothing -> []
-- list of list of body cell elements
let bodyRowElements = case filterChild (named "tbody") e' of
Just b -> map parseElement $ filterChildren isRow b
Nothing -> map parseElement $ filterChildren isRow e'
let toAlignment c = case findAttr (unqual "align") c of
Just "left" -> AlignLeft
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
let toColSpan element = fromMaybe 1 $
findAttr (unqual "colspan") element >>= safeRead
let toRowSpan element = fromMaybe 1 $
findAttr (unqual "rowspan") element >>= safeRead
let toWidth c = do
w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
if n > 0 then Just n else Nothing
let numrows = foldl' max 0 $ map length bodyrows
let numrows = foldl' max 0 $ map length bodyRowElements
let aligns = case colspecs of
[] -> replicate numrows AlignDefault
cs -> map toAlignment cs
Expand All @@ -298,15 +301,26 @@ parseBlock (Elem e) =
Just ws' -> let tot = sum ws'
in ColWidth . (/ tot) <$> ws'
Nothing -> replicate numrows ColWidthDefault
let toRow = Row nullAttr . map simpleCell
toHeaderRow l = [toRow l | not (null l)]

let parseCell = parseMixed plain . elContent
let elementToCell element = cell
(toAlignment element)
(RowSpan $ toRowSpan element)
(ColSpan $ toColSpan element)
<$> (parseCell element)
let rowElementsToCells elements = mapM elementToCell elements
let toRow = fmap (Row nullAttr) . rowElementsToCells
toHeaderRow element = sequence $ [toRow element | not (null element)]

headerRow <- toHeaderRow headRowElements
bodyRows <- mapM toRow bodyRowElements
return $ table (simpleCaption $ plain capt)
(zip aligns widths)
(TableHead nullAttr $ toHeaderRow headrows)
[TableBody nullAttr 0 [] $ map toRow bodyrows]
(TableHead nullAttr headerRow)
[TableBody nullAttr 0 [] bodyRows]
(TableFoot nullAttr [])
isEntry x = named "entry" x || named "td" x || named "th" x
parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
parseElement = filterChildren isEntry
sect n = do isbook <- gets jatsBook
let n' = if isbook || n == 0 then n + 1 else n
labelText <- case filterChild (named "label") e of
Expand Down
90 changes: 90 additions & 0 deletions test/jats-reader.native
Expand Up @@ -2792,6 +2792,96 @@ Pandoc
]
]
(TableFoot ( "" , [] , [] ) [])
, Header
2
( "table-with-spans-and-alignments" , [] , [] )
[ Str "Tables"
, Space
, Str "with"
, Space
, Str "spans"
, Space
, Str "and"
, Space
, Str "alignments"
]
, Table
( "" , [] , [] )
(Caption Nothing [])
[ ( AlignLeft , ColWidthDefault )
, ( AlignLeft , ColWidthDefault )
, ( AlignLeft , ColWidthDefault )
]
(TableHead
( "" , [] , [] )
[ Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 2)
[ Para [ Str "1" ] ]
, Cell
( "" , [] , [] )
AlignRight
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "2" ] ]
]
])
[ TableBody
( "" , [] , [] )
(RowHeadColumns 0)
[]
[ Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 2)
[ Para [ Str "1" ] ]
, Cell
( "" , [] , [] )
AlignLeft
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "2" ] ]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 2)
(ColSpan 1)
[ Para [ Str "4" ] ]
, Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "5" ] ]
, Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "6" ] ]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 2)
[ Para [ Str "7" ] ]
]
]
]
(TableFoot ( "" , [] , [] ) [])
, Header
2
( "empty-tables" , [] , [] )
Expand Down
44 changes: 44 additions & 0 deletions test/jats-reader.xml
Expand Up @@ -1150,6 +1150,50 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
</tbody>
</table>
</sec>
<sec id="table-with-spans-and-alignments">
<title>Tables with spans and alignments</title>
<table>
<col align="left" />
<col align="left" />
<col align="left" />
<thead>
<tr>
<td colspan="2">
<p>1</p>
</td>
<td align="right">
<p>2</p>
</td>
</tr>
</thead>
<tbody>
<tr>
<td colspan="2">
<p>1</p>
</td>
<td align="left">
<p>2</p>
</td>
</tr>
<tr>
<td rowspan="2">
<p>4</p>
</td>
<td>
<p>5</p>
</td>
<td>
<p>6</p>
</td>
</tr>
<tr>
<td colspan="2">
<p>7</p>
</td>
</tr>
</tbody>
</table>
</sec>
<sec id="empty-tables">
<title>Empty Tables</title>
<p>This section should be empty.</p>
Expand Down

0 comments on commit 0353e11

Please sign in to comment.