-
Notifications
You must be signed in to change notification settings - Fork 4
/
Markdown.hs
380 lines (333 loc) · 20.4 KB
/
Markdown.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
{-# OPTIONS_GHC -fglasgow-exts #-}
{- Markdown.hs - Haskell implementation of markdown using PEG grammar.
(c) 2008 John MacFarlane
Released under the GPL
-}
import Text.Parsers.Frisby
import Text.Parsers.Frisby.Char
import Text.Pandoc.XML
import Data.Char (toUpper)
import Text.PrettyPrint.HughesPJ hiding (text, char, (<>), empty)
import qualified Text.PrettyPrint.HughesPJ as P (text, char, (<>), empty)
import System.Environment
-- Uncomment the following two lines for UTF8 support (requires utf8-string library):
-- import System.IO.UTF8
-- import Prelude hiding (getContents, putStrLn, readFile)
main :: IO ()
main = do
argv <- getArgs
c <- if null argv
then getContents
else mapM readFile argv >>= return . unlines
let (blocks, remaining) = runPeg doc $ tabFilter tabStop (c ++ "\n")
if (not . null) remaining
then error $ "Parse failed at: " ++ take 35 remaining
else do
-- extract link references first, then convert to HTML
let refs = map (\(Reference l (Src (s,t))) -> (l, (s,t))) $ filter isReference blocks
putStrLn $ render $ vcat $ map (blockToHtml refs) blocks
-- | A link target: a URL and title, or a reference, or nothing.
data Target = Src (String, String) -- ^ (URL, title)
| Ref [Inline] String -- ^ the string contains spaces before the reference: e.g. " " in "[label] [ref]"
| Null -- ^ this is used for shortcut references: "[label]"
deriving (Show, Read, Eq)
-- | An inline (span-level) element.
data Inline =
Text String
| Entity String -- ^ an entity reference without the '&' and ';'
| Space
| LineBreak
| Emph [Inline]
| Strong [Inline]
| Code String
| Link [Inline] Target
| Image [Inline] Target
| Html String -- ^ raw HTML
deriving (Show, Read, Eq)
-- | A block element.
data Block =
Para [Inline]
| Plain [Inline]
| Heading Int [Inline]
| BlockQuote [Block]
| BulletList [[Block]]
| OrderedList [[Block]]
| HtmlBlock String
| Verbatim String
| HorizontalRule
| Reference [Inline] Target -- ^ a link reference: e.g. [label]: /url "title"
| Markdown String -- ^ unprocessed markdown, to be parsed into blocks
deriving (Show, Read, Eq)
{- Reading the grammar:
Standard PEG Frisby
------------ -----------------------
A* many A
A+ many1 A
A? optional A (returns () if no match)
option B A (same thing but returns B if no match)
!A doesNotMatch A
&A peek A
A / B A // B
[abc] oneOf "abc"
[^abc] noneOf "abc"
'a' char 'a'
"abc" text "abc"
A B A <> B -- parses A, then B, and returns a pair (A, B)
A ->> B -- parses A, then B, and returns A
A <<- B -- parses A, then B, and returns B
A <++> B -- parses A, then B, and returns concatenation of A and B
eof eof
A <- B A <- newRule $ B
Other Frisby peculiarities:
X ## f -- parses X and passes the result through function f
X ##> a -- parses X and returns constant a
-}
-- | The PEG grammar definition for Markdown.
doc :: forall s . PM s (P s ([Block], String))
doc = mdo
-- characters and tokens
spaceChar <- newRule $ oneOf " \t"
newline <- newRule $ text "\n"
sp <- newRule $ many spaceChar
spnl <- newRule $ sp <++> option "" (newline <++> sp)
specialChar <- newRule $ oneOf "*_`&[]<!\\"
normalChar <- newRule $ escapedChar // (doesNotMatch (specialChar // spaceChar) ->> doesNotMatch newline ->> anyChar)
escapedChar <- newRule $ char '\\' ->> anyChar
-- strings
nonindentSpace <- newRule $ option "" (text " " // text " " // text " ")
indent <- newRule $ text "\t" // text " "
indentedLine <- newRule $ indent ->> anyline
optionallyIndentedLine <- newRule $ indent ->> anyline
anyline <- newRule $ many (doesNotMatch newline ->> doesNotMatch eof ->> anyChar) <++> option "" newline
blankline <- newRule $ sp <++> newline
blockquoteLine <- newRule $ nonindentSpace ->> char '>' ->> optional (char ' ') ->> anyline
quoted <- newRule $ (text "\"" <++> many (noneOf "\"") <++> text "\"") //
(text "'" <++> many (noneOf "'") <++> text "'")
htmlAttribute <- newRule $ many1 alphaNum <++> spnl <++> option "" (text "=" <++> spnl <++>
(quoted // many1 (doesNotMatch spaces ->> anyChar))) <++> spnl
htmlComment <- newRule $ text "<!--" <++> many (doesNotMatch (text "-->") ->> anyChar) <++> text "-->"
htmlTag <- newRule $ text "<" <++> spnl <++> option "" (text "/") <++> many1 alphaNum <++>
spnl <++> (many htmlAttribute ## concat) <++> option "" (text "/") <++> text ">"
-- inlines
inline <- newRule $ strong // emph // code // endline // spaces // link // image // autolink //
rawHtml // str // entity // special
emph <- newRule $ emphStar // emphUl
oneStar <- newRule $ char '*' <<- doesNotMatch oneStar
emphStar <- newRule $ oneStar ->> doesNotMatch spaceChar ->> doesNotMatch newline ->>
many1 (strong // (doesNotMatch (spnl ->> oneStar) ->> inline)) <<- oneStar ## Emph
oneUl <- newRule $ char '_' <<- doesNotMatch oneUl
emphUl <- newRule $ oneUl ->> doesNotMatch spaceChar ->> doesNotMatch newline ->>
many1 (strong // (doesNotMatch (spnl ->> oneUl) ->> inline)) <<- oneUl <<-
doesNotMatch alphaNum ## Emph
strong <- newRule $ strongStar // strongUl
twoStar <- newRule $ text "**" <<- doesNotMatch twoStar
twoUl <- newRule $ text "__" <<- doesNotMatch twoStar
strongStar <- newRule $ twoStar ->> doesNotMatch spaceChar ->> doesNotMatch newline ->>
many1 (doesNotMatch (spnl ->> twoStar) ->> inline) <<- twoStar ## Strong
strongUl <- newRule $ twoUl ->> doesNotMatch spaceChar ->> doesNotMatch newline ->>
many1 (doesNotMatch (spnl ->> twoUl) ->> inline) <<- twoUl <<-
doesNotMatch alphaNum ## Strong
let ticks n = text (replicate n '`') <<- doesNotMatch (char '`')
let betweenTicks n = ticks n ->> many1 (many1 (noneOf "`") // doesNotMatch (ticks n) ->> many1 (char '`')) <<-
ticks n ## lrstrip ' ' . concat
code <- newRule $ peek (char '`') ->> choice (map betweenTicks $ reverse [1..10]) ## Code
rawHtml <- newRule $ (htmlComment // htmlTag) ## Html
str <- newRule $ many1 normalChar ## Text
special <- newRule $ specialChar ## Text . (: [])
spaces <- newRule $ many1 spaceChar ##> Space
endline <- newRule $ optional (text " ") <> newline <> doesNotMatch blankline <> doesNotMatch eof ##> Space
linebreak <- newRule $ text " " ->> sp ->> endline ##> LineBreak
entity <- newRule $ char '&' ->> (text "#" <++> (text "x" // text "X") <++> many1 hexDigit //
text "#" <++> many1 digit // many1 alphaNum) <<- char ';' ## Entity
label <- newRule $ char '[' ->> many (doesNotMatch (char ']') ->> inline) <<- char ']'
title <- newRule $ char '"' ->> many (doesNotMatch (char '"' <> sp <> (text ")" // newline)) ->>
doesNotMatch newline ->> anyChar) <<- char '"' //
char '\'' ->> many (doesNotMatch (char '\'' <> sp <> (text ")" // newline)) ->>
doesNotMatch newline ->> anyChar) <<- char '\''
source <- newRule $ char '<' ->> source' <<- char '>' // source'
source' <- newRule $ many (many1 (noneOf "()> \n\t") // (text "(" <++> source' <++> text ")") //
(text "<" <++> source' <++> text ">")) ## concat
sourceAndTitle <- newRule $ char '(' ->> sp ->> source <<- spnl <> option "" title <<- sp <<- char ')'
explicitLink <- newRule $ label <> spnl ->> sourceAndTitle ## (\(l, s) -> Link l (Src s))
autolinkUrl <- newRule $ char '<' ->> many1 alpha <++> text "://" <++>
many1 (doesNotMatch newline ->> doesNotMatch (char '>') ->> anyChar) <<- char '>' ##
(\s -> Link [Text s] (Src (s, "")))
autolinkEmail <- newRule $ char '<' ->> many1 alpha <++> text "@" <++>
many1 (doesNotMatch newline ->> doesNotMatch (char '>') ->> anyChar) <<- char '>' ##
(\s -> Link [Text s] (Src ("mailto:" ++ s, "")))
autolink <- newRule $ autolinkUrl // autolinkEmail
referenceLink <- newRule $ label <> spnl <> label ## (\((l1,s), l2) -> Link l1 (Ref l2 s)) //
label ## (\l -> Link l Null)
link <- newRule $ explicitLink // referenceLink
image <- newRule $ char '!' ->> link ## (\(Link x y) -> Image x y)
-- blocks
block <- newRule $ blockquote // verbatim // reference // htmlBlock // heading // list // horizontalRule //
para // plain
para <- newRule $ many1 inline <<- newline <<- many1 blankline ## Para
plain <- newRule $ many1 inline <<- optional blankline ## Plain
blockquote <- newRule $ many1 (many1 blockquoteLine <++> many (doesNotMatch blankline ->> anyline) <++>
many blankline ## concat) ## (\ls -> BlockQuote [Markdown $ concat ls ++ "\n"])
let setextHeadingWith lev c = many1 (doesNotMatch endline ->> inline) <<- newline <<-
text (replicate 3 c) <<- (many (char c)) <<- newline ## Heading lev
setextHeading <- newRule $ setextHeadingWith 1 '=' // setextHeadingWith 2 '-'
let atxHeadingFor lev = text (replicate lev '#') ->>
many1 (doesNotMatch endline ->> doesNotMatch (char '#') ->> inline) <<-
many (oneOf "# \t") <<- newline ## Heading lev
atxHeading <- newRule $ choice $ map atxHeadingFor [6, 5..1]
heading <- newRule $ (atxHeading // setextHeading) <<- many blankline
let horizontalRuleWith c = nonindentSpace ->> char c ->> sp ->> char c ->> sp ->> char c ->>
many (sp ->> char c) ->> sp ->> newline ->> many1 blankline ##> HorizontalRule
horizontalRule <- newRule $ choice $ map horizontalRuleWith ['*', '_', '-']
verbatim <- newRule $ many1 (doesNotMatch blankline ->> indentedLine) <++>
(many (many1 (optional indent ->> blankline) <++>
many1 (doesNotMatch blankline ->> indentedLine)) ## concat) <<-
many blankline ## Verbatim . concat
list <- newRule $ bulletList // orderedList
bullet <- newRule $ nonindentSpace ->> oneOf "+*-" <<- many1 spaceChar ## (: [])
bulletList <- newRule $ bulletListTight // bulletListLoose
bulletListTight <- newRule $ many1 (bulletListItem ## (\s -> [Markdown s])) <<- many blankline <<-
doesNotMatch bulletListLoose ## BulletList
bulletListLoose <- newRule $ many1 ((bulletListItem <<- many blankline) ## (\s -> [Markdown $ s ++ "\n\n"])) ##
BulletList
bulletListItem <- newRule $ doesNotMatch horizontalRule ->> bullet ->>
listBlock <++> (many listContinuationBlock ## concat)
listBlock <- newRule $ anyline <++> (many (doesNotMatch (optional indent ->> (bulletListItem // orderedListItem))
->> doesNotMatch blankline ->> doesNotMatch (indent ->> (bullet // enumerator)) ->>
optionallyIndentedLine) ## concat)
listContinuationBlock <- newRule $ ((many1 blankline ## concat) // unit "\0") <++>
(many1 (indent ->> listBlock) ## concat)
enumerator <- newRule $ nonindentSpace ->> many1 digit <<- char '.' <<- many1 spaceChar
orderedList <- newRule $ orderedListTight // orderedListLoose
orderedListTight <- newRule $ many1 (orderedListItem ## (\s -> [Markdown s])) <<- many blankline <<-
doesNotMatch orderedListLoose ## OrderedList
orderedListLoose <- newRule $ many1 ((orderedListItem <<- many blankline) ## (\s -> [Markdown $ s ++ "\n\n"])) ##
OrderedList
orderedListItem <- newRule $ enumerator ->> listBlock <++> (many listContinuationBlock ## concat)
let htmlBlockOpening tag = text "<" <++> spnl <++> text tag <++> spnl <++> (many htmlAttribute ## concat)
let htmlBlockSolo tag = htmlBlockOpening tag <++> text "/" <++> spnl <++> text ">"
let htmlBlockWithEnd tag = htmlBlockOpening tag <++> text ">" <++>
(many (doesNotMatch (htmlBlockEndFor tag) ->>
(htmlBlockAny // many1 (noneOf "<") // text "<")) ## concat) <++>
htmlBlockEndFor tag
let htmlBlockEndFor tag = text "<" <++> spnl <++> text "/" <++> text tag <++> spnl <++> text ">"
let htmlBlockFor tag = htmlBlockSolo tag // htmlBlockWithEnd tag
let blockTags = ["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form", "h1", "h2", "h3",
"h4", "h5", "h6", "hr", "isindex", "menu", "noframes", "noscript", "ol", "p", "pre", "table",
"ul", "dd", "dt", "frameset", "li", "tbody", "td", "tfoot", "th", "thead", "tr", "script"]
htmlBlockAny <- newRule $ choice $ map htmlBlockFor (blockTags ++ map (map toUpper) blockTags)
htmlBlock <- newRule $ nonindentSpace <++> (htmlComment // htmlBlockAny) ## HtmlBlock
-- references
reference <- newRule $ nonindentSpace ->> label <> char ':' ->> spnl ->>
many1 (doesNotMatch spaceChar ->> doesNotMatch newline ->> anyChar) <>
spnl ->> option "" title <<- many blankline ## (\((l,s),t) -> Reference l (Src (s,t)))
-- document - returns (block list, unparsed text)
document <- newRule $ (many (many blankline ->> block) <<- many blankline) <> rest <<- eof
return document
--
-- Convert inlines and blocks to HTML
--
-- | Convert inline element to HTML.
inlineToHtml :: [([Inline],(String, String))] -- ^ list of link references
-> Inline -- ^ inline element
-> Doc
inlineToHtml refs i =
case i of
Text s -> P.text (escapeStringForXML s)
Entity s -> P.char '&' P.<> P.text s P.<> P.char ';'
Space -> P.char ' '
LineBreak -> selfClosingTag "br" []
Code s -> inTagsSimple "code" $ P.text $ escapeStringForXML s
Emph xs -> inTagsSimple "em" $ hcat $ map (inlineToHtml refs) xs
Strong xs -> inTagsSimple "strong" $ hcat $ map (inlineToHtml refs) xs
Html s -> P.text s
-- an autolink <http://google.com> or [explicit link](google.com) with no title
Link l (Src (u,"")) -> inTags False "a" [("href", u)] $ hcat $ map (inlineToHtml refs) l
-- an explicit link with a title: [like this](google.com "title")
Link l (Src (u,t)) -> inTags False "a" [("href", u), ("title", t)] $ hcat $ map (inlineToHtml refs) l
-- a shortcut-style reference link: [like this]
Link l Null -> case lookup l refs of
Just (u, "") -> inTags False "a" [("href", u)] $ hcat $ map (inlineToHtml refs) l
Just (u, t) -> inTags False "a" [("href", u), ("title", t)] $ hcat $
map (inlineToHtml refs) l
Nothing -> hcat $ map (inlineToHtml refs) $ [Text "["] ++ l ++ [Text "]"]
-- a regular reference link: [like][this] or [like] [this]
Link l (Ref r s) -> let r' = if null r then l else r
in case lookup r' refs of
Just (u, "") -> inTags False "a" [("href", u)] $ hcat $ map (inlineToHtml refs) l
Just (u, t) -> inTags False "a" [("href", u), ("title", t)] $ hcat $
map (inlineToHtml refs) l
Nothing -> hcat $ map (inlineToHtml refs) $ [Text "["] ++ l ++
[Text $ "]" ++ s ++ "["] ++ r ++ [Text "]"]
Image l (Src (u,t)) -> selfClosingTag "img" [("src", u), ("title", t),
("alt", render $ hcat $ map (inlineToHtml refs) l)]
-- a shortcut-style reference link: ![like this]
Image l Null -> case lookup l refs of
Just (u, t) -> inlineToHtml refs $ Image l (Src (u,t))
Nothing -> hcat $ map (inlineToHtml refs) $ [Text "!["] ++ l ++ [Text "]"]
-- a regular reference link: ![like][this] or ![like] [this]
Image l (Ref r s) -> let r' = if null r then l else r
in case lookup r' refs of
Just (u, t) -> inlineToHtml refs $ Image l (Src (u, t))
Nothing -> hcat $ map (inlineToHtml refs) $ [Text "!["] ++ l ++
[Text $ "]" ++ s ++ "["] ++ r ++ [Text "]"]
-- | Convert block element to HTML.
blockToHtml :: [([Inline],(String, String))] -- ^ list of link references
-> Block -- ^ block element to convert
-> Doc
blockToHtml refs block =
case block of
Para xs -> inTagsSimple "p" $ wrap refs $ lrstrip Space xs
Plain xs -> wrap refs $ lrstrip Space xs
Heading lev xs -> inTagsSimple ("h" ++ show lev) $ hcat $ map (inlineToHtml refs) $ lrstrip Space xs
HorizontalRule -> selfClosingTag "hr" []
BlockQuote xs -> inTagsIndented "blockquote" $ vcat $ map (blockToHtml refs) xs
Verbatim s -> inTagsSimple "pre" $ inTagsSimple "code" $ P.text $ escapeStringForXML s
BulletList items -> inTagsIndented "ul" $ vcat $
map (\item -> inTagsSimple "li" $ vcat $ map (blockToHtml refs) item) items
OrderedList items -> inTagsIndented "ol" $ vcat $
map (\item -> inTagsSimple "li" $ vcat $ map (blockToHtml refs) item) items
Markdown s -> -- handle a raw chunk of markdown, e.g. a list item or block quote contents
-- if the chunk contains \0, it is split at that point into two chunks that
-- are parsed separately. This allows correct handling of lists like:
-- - item
-- - sub
-- - sub
let (a, b) = break (=='\0') s
(parsed, remaining) = runPeg doc a
first = vcat $ map (blockToHtml refs) parsed
in if null remaining
then if null b
then first
else first $$ blockToHtml refs (Markdown $ tail b)
else error $ "Parse failed at: " ++ take 35 remaining
Reference _ _ -> P.empty
HtmlBlock s -> P.text s
--
-- Wrapping code and other utilities from pandoc
--
-- | Take list of inline elements and return wrapped doc.
wrap :: [([Inline],(String, String))] -> [Inline] -> Doc
wrap refs lst = fsep $ map (hcat . map (inlineToHtml refs)) (splitBy Space lst)
-- | Split list by groups of one or more sep.
splitBy :: (Eq a) => a -> [a] -> [[a]]
splitBy _ [] = []
splitBy s lst =
let (x, xs) = break (== s) lst
xs' = dropWhile (== s) xs
in x:(splitBy s xs')
-- | Strip leading and trailing elements.
lrstrip :: (Eq a) => a -> [a] -> [a]
lrstrip x = reverse . dropWhile (== x) . reverse . dropWhile (== x)
tabStop :: Int
tabStop = 4
tabFilter :: Int -> String -> String
tabFilter _ [] = ""
tabFilter _ ('\r':'\n':xs) = '\n' : tabFilter tabStop xs
tabFilter _ ('\r':xs) = '\n' : tabFilter tabStop xs
tabFilter _ ('\n':xs) = '\n' : tabFilter tabStop xs
tabFilter spsToNextStop ('\t':xs) = replicate spsToNextStop ' ' ++ tabFilter tabStop xs
tabFilter 1 (x:xs) = x:(tabFilter tabStop xs)
tabFilter spsToNextStop (x:xs) = x:(tabFilter (spsToNextStop - 1) xs)
isReference :: Block -> Bool
isReference (Reference _ _) = True
isReference _ = False