From ed1173ace6c545f5b98c2b84859db89a6b9e799d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Nov 2015 20:18:06 -0800 Subject: [PATCH] Rationalized behavior of --no-tex-ligatures and --smart. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes `--no-tex-ligatures` affect the LaTeX reader as well as the LaTeX and ConTeXt writers. If it is used, the LaTeX reader will parse characters `` ` ``, `'`, and `-` literally, rather than parsing ligatures for quotation marks and dashes. And the LaTeX writer will print unicode quotation mark and dash characters literally, rather than converting them to the standard ASCII ligatures. Note that `--smart` has no affect on the LaTeX reader. `--smart` is still the default for all input formats when LaTeX or ConTeXt is the output format, *unless* `--no-tex-ligatures` is used. Some examples to illustrate the logic: ``` % echo "'hi'" | pandoc -t latex `hi' % echo "'hi'" | pandoc -t latex --no-tex-ligatures 'hi' % echo "'hi'" | pandoc -t latex --no-tex-ligatures --smart ‘hi’ % echo "'hi'" | pandoc -f latex --no-tex-ligatures

'hi'

% echo "'hi'" | pandoc -f latex

’hi’

``` Closes #2541. --- README | 24 ++++++++++++++---------- pandoc.hs | 9 +++++++-- src/Text/Pandoc/Readers/LaTeX.hs | 24 +++++++++++++++--------- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/README b/README index 192415199cd5..50ec125961e8 100644 --- a/README +++ b/README @@ -374,7 +374,7 @@ Reader options `...` to ellipses. Nonbreaking spaces are inserted after certain abbreviations, such as "Mr." (Note: This option is selected automatically when the output format is `latex` or `context`, unless `--no-tex-ligatures` - is used.) + is used. It has no effect for `latex` input.) `--old-dashes` @@ -651,15 +651,19 @@ Options affecting specific writers `--no-tex-ligatures` -: Do not convert quotation marks, apostrophes, and dashes to - the TeX ligatures when writing LaTeX or ConTeXt. Instead, just - use literal unicode characters. This is needed for using advanced - OpenType features with `xelatex` and `lualatex`. Note: normally - `--smart` is selected automatically for LaTeX and ConTeXt - output, but it must be specified explicitly if `--no-tex-ligatures` - is selected. If you use literal curly quotes, dashes, and ellipses - in your source, then you may want to use `--no-tex-ligatures` - without `--smart`. +: Do not use the TeX ligatures for quotation marks, apostrophes, + and dashes (`` `...' ``, ` ``..'' `, `--`, `---`) when + writing or reading LaTeX or ConTeXt. In reading LaTeX, + parse the characters `` ` ``, `'`, and `-` literally, rather + than parsing ligatures for quotation marks and dashes. In + writing LaTeX or ConTeXt, print unicode quotation mark and + dash characters literally, rather than converting them to + the standard ASCII TeX ligatures. Note: normally `--smart` + is selected automatically for LaTeX and ConTeXt output, but + it must be specified explicitly if `--no-tex-ligatures` is + selected. If you use literal curly quotes, dashes, and + ellipses in your source, then you may want to use + `--no-tex-ligatures` without `--smart`. `--listings` diff --git a/pandoc.hs b/pandoc.hs index edcaa4bb9adc..2353ebb119ed 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1176,6 +1176,9 @@ main = do "beamer" `isPrefixOf` writerName' let conTeXtOutput = "context" `isPrefixOf` writerName' + let laTeXInput = "latex" `isPrefixOf` readerName' || + "beamer" `isPrefixOf` readerName' + writer <- if ".lua" `isSuffixOf` writerName' -- note: use non-lowercased version writerName then return $ IOStringWriter $ writeCustom writerName @@ -1257,8 +1260,10 @@ main = do uriFragment = "" } _ -> Nothing - let readerOpts = def{ readerSmart = smart || (texLigatures && - (laTeXOutput || conTeXtOutput)) + let readerOpts = def{ readerSmart = if laTeXInput + then texLigatures + else smart || (texLigatures && + (laTeXOutput || conTeXtOutput)) , readerStandalone = standalone' , readerParseRaw = parseRaw , readerColumns = columns diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index a6db6ffad44f..b9645d034e9d 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -169,17 +169,23 @@ quoted' f starter ender = do try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs doubleQuote :: LP Inlines -doubleQuote = - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') +doubleQuote = do + smart <- getOption readerSmart + if smart + then quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") + <|> quoted' doubleQuoted (string "“") (void $ char '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") + <|> quoted' doubleQuoted (string "\"") (void $ char '"') + else str <$> many1 (oneOf "`'“”\"") singleQuote :: LP Inlines -singleQuote = - quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) +singleQuote = do + smart <- getOption readerSmart + if smart + then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) + <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) + else str <$> many1 (oneOf "`\'‘’") inline :: LP Inlines inline = (mempty <$ comment)