Skip to content

Commit

Permalink
Move CR filtering from tabFilter to the readers.
Browse files Browse the repository at this point in the history
The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.
  • Loading branch information
jgm committed Jun 20, 2017
1 parent 4ba5ef4 commit 2363e6a
Show file tree
Hide file tree
Showing 17 changed files with 115 additions and 104 deletions.
4 changes: 0 additions & 4 deletions src/Text/Pandoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,6 @@ inline links:
> main = do
> T.getContents >>= mdToRST >>= T.putStrLn
Note: all of the readers assume that the input text has @'\n'@
line endings. So if you get your input text from a web form,
you should remove @'\r'@ characters using @filter (/='\r')@.
-}

module Text.Pandoc
Expand Down
4 changes: 2 additions & 2 deletions src/Text/Pandoc/App.hs
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,8 @@ convertWithOpts opts = do
| otherwise -> []

let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t"
then 0
else optTabStop opts)
then 0
else optTabStop opts)

readSources :: [FilePath] -> PandocIO Text
readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
Expand Down
5 changes: 3 additions & 2 deletions src/Text/Pandoc/Readers/DocBook.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Data.Char (toUpper)
import Text.Pandoc.Shared (safeRead)
import Text.Pandoc.Shared (safeRead, crFilter)
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder
Expand Down Expand Up @@ -526,7 +526,8 @@ instance Default DBState where

readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readDocBook _ inp = do
let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp
let tree = normalizeTree . parseXML . handleInstructions
$ T.unpack $ crFilter inp
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)

Expand Down
4 changes: 2 additions & 2 deletions src/Text/Pandoc/Readers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, addMetaField
, escapeURI, safeRead )
, escapeURI, safeRead, crFilter )
import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
Extension (Ext_epub_html_exts,
Ext_raw_html, Ext_native_divs, Ext_native_spans))
Expand Down Expand Up @@ -82,7 +82,7 @@ readHtml :: PandocMonad m
readHtml opts inp = do
let tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True }
inp
(crFilter inp)
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
Expand Down
4 changes: 2 additions & 2 deletions src/Text/Pandoc/Readers/Haddock.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
import Text.Pandoc.Shared (splitBy, trim)
import Text.Pandoc.Shared (splitBy, trim, crFilter)


-- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
readHaddock opts s = case readHaddockEither opts (unpack s) of
readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
Right result -> return result
Left e -> throwError e

Expand Down
3 changes: 2 additions & 1 deletion src/Text/Pandoc/Readers/LaTeX.hs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ readLaTeX :: PandocMonad m
-> Text -- ^ String to parse (assumes @'\n'@ line endings)
-> m Pandoc
readLaTeX opts ltx = do
parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx)
parsed <- readWithM parseLaTeX def{ stateOptions = opts }
(unpack (crFilter ltx))
case parsed of
Right result -> return result
Left e -> throwError e
Expand Down
2 changes: 1 addition & 1 deletion src/Text/Pandoc/Readers/Markdown.hs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ readMarkdown :: PandocMonad m
-> m Pandoc
readMarkdown opts s = do
parsed <- (readWithM parseMarkdown) def{ stateOptions = opts }
(T.unpack s ++ "\n\n")
(T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
Expand Down
5 changes: 3 additions & 2 deletions src/Text/Pandoc/Readers/MediaWiki.hs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim)
import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim,
crFilter)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)

Expand All @@ -77,7 +78,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
(unpack s ++ "\n")
(unpack (crFilter s) ++ "\n")
case parsed of
Right result -> return result
Left e -> throwError e
Expand Down
3 changes: 2 additions & 1 deletion src/Text/Pandoc/Readers/Muse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Parsing hiding (macro, nested)
import Text.Pandoc.Readers.HTML (htmlTag)
import Text.Pandoc.XML (fromEntities)
Expand All @@ -68,7 +69,7 @@ readMuse :: PandocMonad m
-> Text
-> m Pandoc
readMuse opts s = do
res <- readWithM parseMuse def{ stateOptions = opts } (unpack s)
res <- readWithM parseMuse def{ stateOptions = opts } (unpack (crFilter s))
case res of
Left e -> throwError e
Right d -> return d
Expand Down
4 changes: 3 additions & 1 deletion src/Text/Pandoc/Readers/OPML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.XML.Light
Expand All @@ -32,7 +33,8 @@ instance Default OPMLState where
readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readOPML _ inp = do
(bs, st') <- flip runStateT def
(mapM parseBlock $ normalizeTree $ parseXML (unpack inp))
(mapM parseBlock $ normalizeTree $
parseXML (unpack (crFilter inp)))
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
Expand Down
3 changes: 2 additions & 1 deletion src/Text/Pandoc/Readers/Org.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
import Text.Pandoc.Parsing (reportLogMessages)
import Text.Pandoc.Shared (crFilter)

import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
Expand All @@ -51,7 +52,7 @@ readOrg :: PandocMonad m
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
(T.unpack s ++ "\n\n")
(T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "problem parsing org"
Expand Down
2 changes: 1 addition & 1 deletion src/Text/Pandoc/Readers/RST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ readRST :: PandocMonad m
-> m Pandoc
readRST opts s = do
parsed <- (readWithM parseRST) def{ stateOptions = opts }
(T.unpack s ++ "\n\n")
(T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
Expand Down
3 changes: 2 additions & 1 deletion src/Text/Pandoc/Readers/TWiki.hs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Shared (crFilter)
import Data.Text (Text)
import qualified Data.Text as T

Expand All @@ -58,7 +59,7 @@ readTWiki :: PandocMonad m
-> m Pandoc
readTWiki opts s = do
res <- readWithM parseTWiki def{ stateOptions = opts }
(T.unpack s ++ "\n\n")
(T.unpack (crFilter s) ++ "\n\n")
case res of
Left e -> throwError e
Right d -> return d
Expand Down
4 changes: 2 additions & 2 deletions src/Text/Pandoc/Readers/Textile.hs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared (trim)
import Text.Pandoc.Shared (trim, crFilter)
import Data.Text (Text)
import qualified Data.Text as T

Expand All @@ -79,7 +79,7 @@ readTextile :: PandocMonad m
-> m Pandoc
readTextile opts s = do
parsed <- readWithM parseTextile def{ stateOptions = opts }
(T.unpack s ++ "\n\n")
(T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
Expand Down
6 changes: 4 additions & 2 deletions src/Text/Pandoc/Readers/Txt2Tags.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (macro, space, spaces, uri)
import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter)
import Control.Monad (guard, void, when)
import Control.Monad.Reader (Reader, asks, runReader)
import Data.Default
Expand Down Expand Up @@ -95,7 +95,9 @@ readTxt2Tags :: PandocMonad m
-> m Pandoc
readTxt2Tags opts s = do
meta <- getT2TMeta
let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n")
let parsed = flip runReader meta $
readWithM parseT2T (def {stateOptions = opts}) $
T.unpack (crFilter s) ++ "\n\n"
case parsed of
Right result -> return $ result
Left e -> throwError e
Expand Down
Loading

0 comments on commit 2363e6a

Please sign in to comment.