Skip to content
Browse files

Initial commit for release on github

  • Loading branch information...
0 parents commit fee675b43fd1b77fffa32e9ae852ba5fb080bad4 @chenkelmann committed
55 .gitignore
@@ -0,0 +1,55 @@
+# use glob syntax.
+syntax: glob
+*.ser
+*.class
+*~
+*.bak
+*.off
+*.old
+.DS_Store
+
+# logs
+derby.log
+
+# eclipse conf file
+.settings
+.classpath
+.project
+.manager
+.scala_dependencies
+
+# building
+target
+build/target
+null
+tmp*
+dist
+test-output
+
+# sbt
+target
+lib_managed
+src_managed
+project/boot
+
+# db
+lift_proto*
+
+# other scm
+.svn
+.CVS
+.hg*
+
+# switch to regexp syntax.
+# syntax: regexp
+# ^\.pc/
+
+# IntelliJ
+*.iml
+*.ipr
+*.iws
+.idea
+
+# Pax Runner (for easy OSGi launching)
+runner
+
29 LICENSE
@@ -0,0 +1,29 @@
+Actuarius
+Copyright © 2010, Christoph Henkelmann
+http://henkelmann.eu/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+- Neither the name “Actuarius”, nor the names of its contributors
+may be used to endorse or promote products derived from this software without
+specific prior written permission.
+
+This software is provided by the copyright holders and contributors “as is” and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are disclaimed.
+In no event shall the copyright owner or contributors be liable for any direct,
+indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused and on any
+theory of liability, whether in contract, strict liability, or tort
+(including negligence or otherwise) arising in any way out of the use of this software,
+even if advised of the possibility of such damage.
7 TODO
@@ -0,0 +1,7 @@
+- Decorator for whole output (default: <div>)
+- fast links (<foo>) evaluated too eagerly
+- simple java wrapper
+- speedup of br, em and strong
+- speedup by referencing (start, stop) indices when tokenizing lines
+ (CharSequence based on index pairs)
+- if xml is disabled, turn elements into plain text instead of turning them into links
8 project/build.properties
@@ -0,0 +1,8 @@
+#Project properties
+#Sun Dec 12 12:54:53 CET 2010
+project.organization=eu.henkelmann
+project.name=actuarius
+sbt.version=0.7.7
+project.version=0.2.3
+build.scala.versions=2.9.1 2.9.0-1 2.8.1 2.8.0
+project.initialize=false
26 project/build/ActuariusProject.scala
@@ -0,0 +1,26 @@
+import sbt._
+import de.element34.sbteclipsify._
+import eu.henkelmann.sbt._
+
+class ActuariusProject(info: ProjectInfo) extends DefaultProject(info)
+ with IdeaProject with Eclipsify{
+
+ val scalatest = "org.scalatest" % "scalatest_2.9.0" % "1.4.1" % "test->default" withSources()
+ val junit = "junit" % "junit" % "4.8.2" % "test->default"
+
+ def junitXmlListener: TestReportListener = new JUnitXmlTestsListener(outputPath.toString)
+ override def testListeners: Seq[TestReportListener] = super.testListeners ++ Seq(junitXmlListener)
+
+ override def managedStyle = ManagedStyle.Maven
+ lazy val publishTo = Resolver.file("Development Repo", new java.io.File((Path.userHome /"srv"/"maven").toString))
+
+
+ // define additional artifacts
+ // create jar paths for javadoc and sources
+ override def packageDocsJar = defaultJarPath("-javadoc.jar")
+ override def packageSrcJar = defaultJarPath("-sources.jar")
+ val sourceArtifact = Artifact.sources(artifactID)
+ val docsArtifact = Artifact.javadoc(artifactID)
+
+ override def packageToPublishActions = super.packageToPublishActions ++ Seq(packageDocs, packageSrc)
+}
11 project/plugins/ActuariusPlugins.scala
@@ -0,0 +1,11 @@
+import sbt._
+
+class ActuariusPlugins(info: ProjectInfo) extends PluginDefinition(info) {
+ val ideaRepo = "GH-pages repo" at "http://mpeltonen.github.com/maven/"
+ lazy val idea = "com.github.mpeltonen" % "sbt-idea-plugin" % "0.1-SNAPSHOT"
+
+ val junitXmlRepo = "Christoph's Maven Repo" at "http://maven.henkelmann.eu/"
+ lazy val junitXml = "eu.henkelmann" % "junit_xml_listener" % "0.2"
+
+ lazy val eclipse = "de.element34" % "sbt-eclipsify" % "0.7.0"
+}
3 project/plugins/project/build.properties
@@ -0,0 +1,3 @@
+#Project properties
+#Thu Nov 24 10:35:01 CET 2011
+plugin.uptodate=true
252 src/main/scala/eu/henkelmann/actuarius/BaseParsers.scala
@@ -0,0 +1,252 @@
+package eu.henkelmann.actuarius
+
+import util.parsing.json.Parser
+import util.parsing.combinator.RegexParsers
+import collection.SortedMap
+
+
+/**
+ * Basic parsers for Markdown Source.
+ * Provides general, small parsers that are used by other parsers.
+ * Also contains parsers for XML elements.
+ */
+
+trait BaseParsers extends RegexParsers {
+
+ /////////////////////////////////////////
+ // Basic parsers used by other parsers //
+ /////////////////////////////////////////
+
+ /**
+ * Whitespace is sometimes important in markdown parsing,
+ * we handle it manually. So this returns false.
+ */
+ override def skipWhitespace = false
+
+ /** accepts one or more spaces or tabs
+ * returns the matched whitespace
+ */
+ def ws:Parser[String] = """( |\t|\v)+""".r
+
+ /** accepts zero or more spaces or tabs
+ * returns the matched whitespace
+ */
+ def ows:Parser[String] = Parser{in =>
+ if (in.atEnd) Success("test", in)
+ else {
+ var i = in.offset
+ val s = in.source
+ val end = s.length
+ //process chars as long as it is whitespace
+ while (i<end && (s.charAt(i)==' ' || s.charAt(i)=='\t')) {
+ //advance a char
+ i += 1
+ }
+ Success(s.subSequence(in.offset, i).toString, in.drop(i - in.offset))
+ }
+ }
+
+ /** Accepts a unix newline and returns a string containing a single newline.
+ */
+ def nl:Parser[String] = '\n' ^^^ {"\n"}
+
+ /**
+ * Matches everything in the parsed string up to the end.
+ * Also matches the empty String. Returns the matched String.
+ */
+ def rest:Parser[String] = Parser { in =>
+ if (in.atEnd) {
+ Success("", in)
+ } else {
+ val source = in.source
+ val offset = in.offset
+ Success(source.subSequence(offset, source.length).toString, in.drop(source.length-offset))
+ }
+ }
+
+ /**
+ * Matches exactly one char, no matter which.
+ * This differs from "elem" as it returns a string consisting of that char.
+ */
+ def any:Parser[String] = Parser{ in =>
+ if (in.atEnd) Failure("End of input reached", in)
+ else Success(in.first.toString, in.rest)
+ }
+
+ /**
+ * Matches one of the chars in the given set.
+ * Returns a string with the matched char.
+ */
+ def oneOf(lookup:Set[Char]):Parser[String] = Parser{ in =>
+ if (lookup.contains(in.first)) Success(in.first.toString, in.rest)
+ else Failure("Expected one of " + lookup + " but found '" + in.first + "'", in)
+ }
+
+ /**
+ * Matches one of the given char keys in the map.
+ * Returns the string value for the matched char in the given map.
+ */
+ def oneOf(lookup:Map[Char,String]):Parser[String] = Parser{ in =>
+ if (lookup.contains(in.first)) Success(lookup(in.first), in.rest)
+ else Failure("Expected one of " + lookup.keys + " but found '" + in.first + "'", in)
+ }
+
+ /**
+ * Looks if the preceding char was one of the given chars.
+ * Never consumes any input.
+ */
+ def lookbehind(cs:Set[Char]):Parser[Unit] = Parser{ in =>
+ val source = in.source
+ val offset = in.offset
+ if (offset == 0) {
+ Failure("No chars before current char, cannot look behind.", in)
+ } else if (!cs.contains(source.charAt(offset-1))) {
+ Failure("Previous char was '" + source.charAt(offset-1) + "' expected one of " + cs, in)
+ } else {
+ Success((), in)
+ }
+ }
+
+ /**
+ * Returns a verbose description of a char (printed char & hex code).
+ * Used for debugging.
+ */
+ def verboseString(c:Char) = "'" + c + "'(\\u" + Integer.toHexString(c) + ")"
+
+ /**
+ * Matches one char in the given range, returns the matched char.
+ */
+ def range(begin:Char, end:Char):Parser[Char] = Parser{ in =>
+ val c = in.first
+ if (begin <= c && c <= end) Success(c, in.rest)
+ else Failure(verboseString(c) + " not in range " +
+ verboseString(begin) + " - " + verboseString(end),
+ in)
+ }
+
+ def ranges(rs:SortedMap[Char, Char]):Parser[Char] = Parser{ in =>
+ if (in.atEnd) Failure("End of input.", in)
+ else {
+ val c = in.first
+ val lower:SortedMap[Char,Char] = rs.to(c)
+ val (begin:Char, end:Char) = if (lower.isEmpty) ('\u0001', '\u0000') //this invalid pair always causes failure
+ else lower.last
+
+ if (begin <= c && c <= end) Success(c, in.rest)
+ else Failure(verboseString(c) + " not in range " +
+ verboseString(begin) + " - " + verboseString(end),
+ in)
+ }
+ }
+
+ /**
+ * Succeeds if the given parsers succeeds and the given function is defined at the parse result.
+ * Returns the result of the method applied to the given parsers result.
+ */
+ def acceptMatch[S,T](f:PartialFunction[S,T])(p:Parser[S]):Parser[T] = Parser { in =>
+ p(in) match {
+ case Success(result, next) if (f.isDefinedAt(result)) => Success(f(result), next)
+ case Success(result, _) => Failure("Function not defined at " + result, in)
+ case Failure(msg, _) => Failure(msg, in)
+ case Error(msg, _) => Error(msg, in)
+ }
+ }
+
+
+ /////////////////////////////////////////////////////////////////////////
+ // parsers for basic markdown entities like letters, xml fragments ... //
+ /////////////////////////////////////////////////////////////////////////
+ /** a mapping of all chars that have to be escaped in xml and the resulting escape sequence
+ * The array solution is very ugly, but cuts down block parsing time by 25%
+ */
+ private val escapedXmlChars = new Array[String](128)
+ escapedXmlChars('<') = "&lt;"
+ escapedXmlChars('>') = "&gt;"
+ escapedXmlChars('"') = "&quot;"
+ escapedXmlChars('\'') = "&apos;"
+ escapedXmlChars('&') = "&amp;"
+
+ /**
+ * Escapes the given char for XML. Returns Either the
+ * necessary XML escape Sequence or the same char in a String.
+ */
+ def escapeForXml(c:Char):String = {
+ //looks horrible but massively faster than using a proper map and Option[String]
+ val escaped:String = escapeFastForXml(c)
+ if (escaped == null) Character.toString(c)
+ else escaped
+ }
+
+ /**
+ * Either returns the XML escape sequence for the given char or null.
+ * This does not return Option[String] on purpose. While Option[String]
+ * would be a much cleaner solution, this is actually called so often
+ * that it is a noticeable difference if we use Option here.
+ */
+ def escapeFastForXml(c:Char) = if (c < escapedXmlChars.length) escapedXmlChars(c)
+ else null
+
+ /* A single char. If it is one of the chars that have to be escaped in XML it is returned as the xml escape code
+ * i.e. parsing '<' returns "&lt;"
+ */
+ def aChar = Parser{ in =>
+ if (in.atEnd) {
+ Failure("End of input reached.", in)
+ } else {
+ Success(escapeForXml(in.first), in.rest)
+ }
+ }
+
+ val xmlNameStartCharRanges:SortedMap[Char,Char] =
+ SortedMap(':' -> ':', 'A' -> 'Z', '_' -> '_', 'a' -> 'z', '\u00C0' -> '\u00D6',
+ '\u00D8' -> '\u00F6', '\u00F8' -> '\u02FF', '\u0370' -> '\u037D', '\u037F' -> '\u1FFF',
+ '\u200C' -> '\u200D', '\u2070' -> '\u218F', '\u2C00' -> '\u2FEF', '\u3001' -> '\uD7FF',
+ '\uF900' -> '\uFDCF', '\uFDF0' -> '\uFFFD')//'\u10000' -> '\uEFFFF'
+
+ val xmlNameCharRanges:SortedMap[Char,Char] =
+ xmlNameStartCharRanges ++ SortedMap('-' -> '-', '.' -> '.', '0'->'9',
+ '\u00b7'->'\u00b7', '\u0300' -> '\u0369', '\u203F' -> '\u2040')
+
+ /**Parser for one char that starts an XML name.
+ * According to W3C specs except that range #x10000 to #xEFFFF
+ * is excluded (cannot be expressed by char literals)
+ */
+ def xmlNameStartChar:Parser[Char] = ranges(xmlNameStartCharRanges)
+ /** Parses an XML name char according to W3C spec except that range #x10000 to #xEFFFF is excluded
+ */
+ def xmlNameChar:Parser[Char] = ranges(xmlNameCharRanges)
+ /** Parses an XML name (tag or attribute name)
+ */
+ def xmlName:Parser[String] = xmlNameStartChar ~ (xmlNameChar*) ^^ {case c ~ cs => c + cs.mkString}
+ /** Parses a Simplified xml attribute: everything between quotes ("foo")
+ * everything between the quotes is run through the escape handling
+ * That way you can omit xml escaping when writing inline XML in markdown.
+ */
+ def xmlAttrVal:Parser[String] = '"' ~> ((not('"') ~> aChar)*) <~ '"' ^^ {'"' + _.mkString + '"'}
+ /** Parses an XML Attribute with simplified value handling like xmlAttrVal.
+ */
+ def xmlAttr:Parser[String] = ws ~ xmlName ~ '=' ~ xmlAttrVal ^^ {
+ case w ~ name ~ _ ~ value => w + name + '=' + value
+ }
+ /** Parses an xml start or empty tag, attribute values are escaped.
+ */
+ def xmlStartOrEmptyTag:Parser[String] = '<' ~> xmlName ~ (xmlAttr*) ~ ows ~ (">" | "/>") ^^ {
+ case name ~ attrs ~ w ~ e => '<' + name + attrs.mkString + w + e
+ }
+
+ /** Parses closing xml tags.
+ */
+ def xmlEndTag:Parser[String] = "</" ~> xmlName <~ ">" ^^ {"</" + _ + ">"}
+
+
+ /** Runs the given parser on the given input.
+ * Expects the parser to succeed and consume all input.
+ * Throws an IllegalArgumentException if parsing failed.
+ */
+ def apply[T](p:Parser[T], in:String):T = {
+ parseAll(p, in) match {
+ case Success(t, _) => t
+ case e: NoSuccess => throw new IllegalArgumentException("Could not parse '" + in + "': " + e)
+ }
+ }
+}
413 src/main/scala/eu/henkelmann/actuarius/BlockParsers.scala
@@ -0,0 +1,413 @@
+package eu.henkelmann.actuarius
+
+import collection.immutable.StringOps
+import collection.mutable.ListBuffer
+import xml.{Group, Node, Text, NodeSeq, Elem => XmlElem, TopScope, XML}
+import xml.parsing.XhtmlParser
+import util.parsing.combinator.{Parsers, RegexParsers}
+
+/**
+ * A parser for the markdown language.
+ * Works on pre-parsed lines that can be created by a LineParser.
+ */
+
+trait BlockParsers extends Parsers {
+ type Elem = MarkdownLine
+ //why does this not allow us to access the lookup map in the lookup parser?
+ //override type Input = MarkdownLineReader
+ //hmm, compiler does not accept this, though MarkdownLineReader extends Reader[MarkdownLine]...
+
+ /**
+ * Used to define the output format of parsed blocks and whether verbatim xml blocks are allowed.
+ */
+ def deco():Decorator = Decorator
+
+ /**
+ * returns the current indentation string repeated the given number of levels
+ */
+ def indent(level:Int):String = deco.indentation * level
+
+ private val tokenizer = new LineTokenizer()
+
+ /** A markdown block element.
+ */
+ sealed abstract class MarkdownBlock extends InlineParsers{
+ override def deco = BlockParsers.this.deco
+
+ /** adds the resulting xhtml snippet to the given string builder
+ */
+ def addResult(level:Int, out:StringBuilder):Unit
+ /** returns the resulting xhtml snippet as a string
+ */
+ def result():String = {
+ val sb = new StringBuilder
+ addResult(0, sb)
+ sb.toString
+ }
+ }
+
+
+
+ //////////////////////////
+ // non-recursive blocks //
+ //////////////////////////
+
+ /**:?
+ * Represents a block of verbatim xml
+ */
+ class VerbatimXml(line:XmlChunk) extends MarkdownBlock {
+ def addResult(level:Int, out:StringBuilder) {out.append(line.content)}
+ }
+
+ /**
+ * Represents a horizontal ruler
+ */
+ object Ruler extends MarkdownBlock {
+ def addResult(level:Int, out:StringBuilder) {out.append(indent(level)).append(deco.decorateRuler)}
+ }
+
+ /**
+ * Represents a header
+ */
+ case class Header(content:String, headerLevel:Int, lookup:Map[String, LinkDefinition]) extends MarkdownBlock{
+ def addResult(level:Int, out:StringBuilder) {
+ out.append(indent(level)).append(deco.decorateHeaderOpen(headerLevel))
+ .append(applyInline(content, lookup))
+ .append(indent(level)).append(deco.decorateHeaderClose(headerLevel))
+ }
+ }
+
+ /**
+ * Represents a block of verbatim qouted code
+ */
+ class CodeBlock(lines:List[MarkdownLine]) extends MarkdownBlock{
+ def addResult(level:Int, out:StringBuilder) {
+ out.append(indent(level)).append(deco.decorateCodeBlockOpen)
+ for (line <- lines) {
+ val escaped = escapeXml(line.payload)
+ out.append(escaped).append('\n')
+ //out.append(line.content)
+ }
+ out.append(indent(level)).append(deco.decorateCodeBlockClose)
+ }
+ }
+
+ /**
+ * Represents a paragraph of text
+ */
+ class Paragraph(lines:List[MarkdownLine], lookup:Map[String, LinkDefinition])
+ extends MarkdownBlock{
+
+ def addResult(level:Int, out:StringBuilder) {
+ out.append(indent(level)).append(deco.decorateParagraphOpen)
+ addResultPlain(level, out)
+ out.append(indent(level)).append(deco.decorateParagraphClose)
+ }
+
+ /**
+ * Adds the result without any decoration, (no wrapping tags)
+ * Used for building list items that don't have their content wrappend in paragraphs
+ */
+ def addResultPlain(level:Int, out:StringBuilder) {
+
+ val temp = new StringBuilder()
+ lines.foreach(line => temp.append(indent(level)).append(line.payload).append('\n'))
+ val result = applyInline(temp.toString, lookup)
+ out.append(result)
+
+ //lines.foreach(line => out.append(indent(level)).append(escapeXml(line.content)))
+
+ //drop last newline so paragraph closing tag ends the line
+ if (!out.isEmpty && out.charAt(out.length-1) == '\n') out.deleteCharAt(out.length-1)
+ }
+ }
+
+ //////////////////////
+ // recursive blocks //
+ //////////////////////
+
+ /**
+ * Represents a quoted text block. Text in the block is recursively evaluated.
+ */
+ class Blockquote(lines:List[MarkdownLine], lookup:Map[String, LinkDefinition])
+ extends MarkdownBlock {
+ def addResult(level:Int, out:StringBuilder) {
+ //the block parser needs to recurse:
+ val innerLines = lines.map(line => line.payload)
+ val reader = BlockParsers.this.tokenizer.innerTokenize(innerLines, lookup)
+ //now apply the normal markdown parser to the new content
+ val innerBlocks = BlockParsers.this.applyBlocks(reader)
+ //wrap the resulting blocks in blockquote tags
+ out.append(indent(level)).append(deco.decorateBlockQuoteOpen)
+ innerBlocks.foreach(block => block.addResult(level+1, out))
+ out.append(indent(level)).append(deco.decorateBlockQuoteClose)
+ }
+ }
+
+ /**
+ * Helper class to build lists. Allows easy checking if an item ends with empty lines and
+ * recursively builds the content of an item.
+ */
+ class ListItem(val lines:List[MarkdownLine], lookup:Map[String, LinkDefinition]) extends LineParsers {
+ def endsWithNewline = lines.size > 1 && (lines.last.isInstanceOf[EmptyLine])
+
+ def addResult(level:Int, out:StringBuilder, paragraph_? : Boolean) {
+ out.append(indent(level)).append(deco.decorateItemOpen)
+ //the block parser needs to recurse:
+ val innerLines = lines.map(line => line.payload)
+ val reader = BlockParsers.this.tokenizer.innerTokenize(innerLines, lookup)
+ //now apply the normal markdown parser to the new content
+ val innerBlocks = BlockParsers.this.applyBlocks(reader)
+ innerBlocks match {
+ case (p:Paragraph) :: Nil if (!paragraph_?) => p.addResultPlain(level+1, out)
+ case _ => innerBlocks.foreach(block => block.addResult(level+1, out))
+ }
+ out.append(indent(level)).append(deco.decorateItemClose)
+ }
+ }
+
+ /**
+ * Base class for ordered and unordered lists, allows for correct handling of paragraphs in lists.
+ */
+ abstract class ListBlock (items:List[ListItem]) extends MarkdownBlock {
+ /**
+ * This method recursively goes through the given list and adds the items contents.
+ * It checks the previous item if it ends with empty lines. If it does, it signals the
+ * current item to create paragraphs. In order for this method to work it has to be
+ * called with the first item prepended twice in front of the list. So if the list is
+ * a::b::c, call this method with a::a::b::c
+ */
+ protected def addResult(level:Int, out:StringBuilder, list:List[ListItem]):Unit = list match{
+ case last::current::rest => {
+ current.addResult(level + 1, out, last.endsWithNewline)
+ addResult(level, out, current::rest)
+ }
+ case _ => {}//end of recursion, list with one item or less
+ }
+
+ /**
+ * calls recursive handling of nested items
+ */
+ def addResult(level:Int, out:StringBuilder) {
+ addResult(level, out, items.head::items)
+ }
+ }
+
+ /**
+ * An ordered (i.e. numbered) list of items.
+ */
+ class OList (items:List[ListItem]) extends ListBlock(items) {
+ override def addResult(level:Int, out:StringBuilder) {
+ out.append(indent(level)).append(deco.decorateOListOpen)
+ super.addResult(level, out)
+ out.append(indent(level)).append(deco.decorateOListClose)
+ }
+ }
+
+ /**
+ * An unordered list of items.
+ */
+ class UList (items:List[ListItem]) extends ListBlock(items) {
+ override def addResult(level:Int, out:StringBuilder) {
+ out.append(indent(level)).append(deco.decorateUListOpen)
+ super.addResult(level, out)
+ out.append(indent(level)).append(deco.decorateUListClose)
+ }
+ }
+
+
+ /////////////////////////////////////////////////////////////
+ //////////////// helpers /////////////////////
+ /////////////////////////////////////////////////////////////
+
+ /**
+ * Parses a line of the given type T
+ */
+ def line[T](c:Class[T]):Parser[T] = Parser {in =>
+ if (in.first.getClass == c) Success(in.first.asInstanceOf[T], in.rest)
+ else Failure("Not a fitting line.", in)
+ }
+
+ /**
+ * Parses any line.
+ */
+ def anyLine:Parser[MarkdownLine] = Parser {in =>
+ if (in.atEnd) Failure("End of input reached.", in)
+ else Success(in.first, in.rest)
+ }
+
+ def emptyLine:Parser[EmptyLine] = line(classOf[EmptyLine])
+
+ /**accepts zero or more empty lines
+ */
+ def optEmptyLines:Parser[List[MarkdownLine]] = emptyLine*
+
+ /** accepts one or more empty lines
+ */
+ def emptyLines:Parser[List[MarkdownLine]] = emptyLine+
+
+ /** returns the current link lookup from the reader
+ * always succeeds, never consumes input
+ */
+ def lookup:Parser[Map[String, LinkDefinition]] = Parser { in =>
+ //why is the instanceof necessary? re-declaring type Input above does not change anything :(
+ Success(in.asInstanceOf[MarkdownLineReader].lookup, in)
+ }
+
+ ///////////////////
+ // Block parsers //
+ ///////////////////
+
+ def atxHeader:Parser[Header] = line(classOf[AtxHeaderLine]) ~ lookup ^^ {
+ case l ~ lu => new Header(l.trimHashes, l.headerLevel, lu)
+ }
+
+ def setExtHeader:Parser[Header] =
+ not(emptyLine) ~> anyLine ~ line(classOf[SetExtHeaderLine]) ~ lookup ^^
+ {case l ~ setext ~ lu => new Header(l.fullLine.trim, setext.headerLevel, lu)}
+
+ /** parses a horizontal ruler
+ */
+ def ruler:Parser[MarkdownBlock] = (line(classOf[RulerLine]) | line(classOf[SetExtHeaderLine])) ^^^ {Ruler}
+
+ /** parses a verbatim xml block
+ */
+ def verbatimXml:Parser[VerbatimXml] = line(classOf[XmlChunk]) ^^ {new VerbatimXml(_)}
+
+ /** parses a code block
+ */
+ def codeBlock:Parser[CodeBlock] = line(classOf[CodeLine]) ~ ((optEmptyLines ~ line(classOf[CodeLine]))*) ^^ {
+ case l ~ pairs => new CodeBlock( l :: pairs.map({case (a~b) => a++List(b)}).flatten )
+ }
+
+
+ /** a consecutive block of paragraph lines
+ * returns the content of the matched block wrapped in <p> tags
+ */
+ def paragraph:Parser[Paragraph] = lookup ~ (line(classOf[OtherLine])+) ^^ {case lu ~ ls => new Paragraph(ls, lu)}
+
+ /**
+ * Parses a blockquote fragment: a block starting with a blockquote line followed
+ * by more blockquote or paragraph lines, ends optionally with empty lines
+ */
+ def blockquoteFragment:Parser[List[MarkdownLine]] =
+ line(classOf[BlockQuoteLine]) ~ ((line(classOf[BlockQuoteLine]) | line(classOf[OtherLine]))*) ~ (optEmptyLines) ^^ {
+ case l ~ ls ~ e => (l :: ls ++ e)
+ }
+
+ /**
+ * Parses a quoted block. A quoted block starts with a line starting with "> "
+ * followed by more blockquote lines, paragraph lines following blockqoute lines
+ * and may be interspersed with empty lines
+ */
+ def blockquote:Parser[Blockquote] = lookup ~ (blockquoteFragment+) ^^ {
+ case lu ~ fs => new Blockquote(fs.flatten, lu)
+ }
+
+
+ /**
+ * parses a list of lines that may make up the body of a list item
+ */
+ def itemLines:Parser[List[MarkdownLine]] = ((line(classOf[CodeLine])|line(classOf[OtherLine]))*)
+
+ /**
+ * The continuation of a list item:
+ * A line indented by four spaces or a tab (a continuation line), followed by more continuation or paragraph
+ * lines followed by empty lines
+ */
+ def itemContinuation:Parser[List[MarkdownLine]] =
+ optEmptyLines ~ line(classOf[CodeLine]) ~ itemLines ^^ {
+ case e ~ c ~ cs => e ++ (c :: cs)
+ }
+
+ /**parses an item in an unsorted list
+ */
+ def uItem:Parser[ListItem] = lookup ~ line(classOf[UItemStartLine]) ~ itemLines ~ (itemContinuation*) ~ optEmptyLines ^^ {
+ case lu ~ s ~ ls ~ cs ~ e => new ListItem(s :: ls ++ cs.flatten ++ e, lu)
+ }
+
+ /**parses an item in a sorted list
+ */
+ def oItem:Parser[ListItem] = lookup ~ line(classOf[OItemStartLine]) ~ itemLines ~ (itemContinuation*) ~ optEmptyLines ^^ {
+ case lu ~ s ~ ls ~ cs ~ e => new ListItem(s :: ls ++ cs.flatten ++ e, lu)
+ }
+
+ /** parses an unordered list
+ */
+ def uList:Parser[UList] = (uItem+) ^^ {new UList(_)}
+
+ /** parses an ordered list
+ */
+ def oList:Parser[OList] = (oItem+) ^^ {new OList(_)}
+
+
+ ///////////////////////////////////////////////////////////////
+ /////////////////// high level processing /////////////////////
+ ///////////////////////////////////////////////////////////////
+
+ /**
+ * parses first level blocks (all blocks, including xml)
+ */
+ def outerBlock:Parser[MarkdownBlock] = (verbatimXml <~ optEmptyLines) | innerBlock
+
+ /**
+ * speed up block processing by looking ahead
+ */
+ def fastBlock:Parser[MarkdownBlock] = Parser { in =>
+ if (in.atEnd) {
+ Failure("End of Input.", in)
+ } else {
+ in.first match {
+ case l:AtxHeaderLine => atxHeader(in)
+ case l:RulerLine => ruler(in)
+ //setext headers have been processed before we are called, so this is safe
+ case l:SetExtHeaderLine => ruler(in)
+ case l:CodeLine => codeBlock(in)
+ case l:BlockQuoteLine => blockquote(in)
+ case l:OItemStartLine => oList(in)
+ case l:UItemStartLine => uList(in)
+ case _ => paragraph(in)
+ }
+ }
+ }
+
+ /**
+ * parses inner blocks (everything excluding xml)
+ */
+ def innerBlock:Parser[MarkdownBlock] = (setExtHeader | fastBlock) <~ optEmptyLines
+
+ /**
+ * a markdown parser
+ */
+ def markdown:Parser[List[MarkdownBlock]] = optEmptyLines ~> (outerBlock*)
+
+ /** Generic apply method to run one of our pasers on the given input.
+ */
+ def apply[T](p:Parser[T], in:MarkdownLineReader):T = {
+ phrase(p)(in) match {
+ case Success(t, _) => t
+ case e: NoSuccess => throw new IllegalArgumentException("Could not parse '" + in + "': " + e)
+ }
+ }
+
+ /** parses all blocks from the given reader
+ */
+ def applyBlocks(in:MarkdownLineReader):List[MarkdownBlock] = apply((optEmptyLines ~> (innerBlock*)), in)
+
+ /** Generic apply method to test a single parser
+ */
+ def apply[T](p:Parser[T], list:List[MarkdownLine]):T = apply(p, new MarkdownLineReader(list))
+
+ /** Parses the given input as a markdown document and returns the string result
+ */
+ def apply(in:MarkdownLineReader):String = {
+ phrase(markdown)(in) match {
+ case Success(bs, _) => {
+ val builder = new StringBuilder()
+ bs.foreach(block => block.addResult(0, builder))
+ builder.toString
+ }
+ case e: NoSuccess => throw new IllegalArgumentException("Could not parse " + in + ": " + e)
+ }
+ }
+}
89 src/main/scala/eu/henkelmann/actuarius/Decorator.scala
@@ -0,0 +1,89 @@
+package eu.henkelmann.actuarius
+
+/**
+ * This trait influences the behavior of the Markdown output of inline and block parsers
+ * and the complete transformer.
+ * Mix in this trait and override methods to change the behavior and override the "deco()" method
+ * in the respective parser/transformer to return
+ * your modified instances to change the output they create.
+ *
+ * Inline element decoration methods always get passed the spanned text, so you have to
+ * prepend and append the opening/closing tags. For block elements there is always a method
+ * for the opening and closing tags. This is to make block
+ * processing more efficient to prevent unnecessary String building of whole blocks just to
+ * add tags. (The block building uses a StringBuilder internally and just appends the returned tags)
+ *
+ * If you want line breaks after opening/closing block level tags, you have to add the newline yourself.
+ */
+
+trait Decorator {
+ /**
+ * The string used to ident one level. Defaults to the empty string
+ */
+ def indentation() = ""
+ /**
+ * If true, inline xml tags and verbatim xml blocks are allowed,
+ * otherwise they are escaped and included as plain text
+ */
+ def allowVerbatimXml():Boolean = true
+ /** used to print out manual line breaks (default: <br />)
+ */
+ def decorateBreak():String = "<br />"
+ /** used to print out inline code (default: <code>...</code>)
+ */
+ def decorateCode(code:String):String = "<code>" + code + "</code>"
+ /** used to print out emphasized text (default <em>...</em>)
+ */
+ def decorateEmphasis(text:String):String = "<em>" + text + "</em>"
+ /** Used to print out strong text (default: <strong>...</strong>
+ */
+ def decorateStrong(text:String):String = "<strong>" + text + "</strong>"
+ /** Used to print link elements (default: <a href...)
+ */
+ def decorateLink(text:String, url:String, title:Option[String]):String = title match {
+ case None => "<a href=\"" + url + "\">" + text + "</a>"
+ case Some(t) => "<a href=\"" + url + "\" title=\"" + t + "\">" + text + "</a>"
+ }
+ /** Used to print image elements (default: <img ...)
+ */
+ def decorateImg(alt:String, src:String, title:Option[String]):String = title match {
+ case None => "<img src=\"" + src + "\" alt=\"" + alt + "\" />"
+ case Some(t) => "<img src=\"" + src + "\" alt=\"" + alt + "\" title=\"" + t + "\" />"
+ }
+ /**used to print a horizontal ruler defaults to "<hr />\n" */
+ def decorateRuler():String = "<hr />\n"
+ /** used to print the beginning of a header, defaults to "<h[headerNo]>" */
+ def decorateHeaderOpen(headerNo:Int):String = "<h" + headerNo + ">"
+ /** used to print the end of a header, defaults to "</h[headerNo]\n>" */
+ def decorateHeaderClose(headerNo:Int):String = "</h" + headerNo + ">\n"
+ /** used to print the beginning of a code block, defaults to "<pre><code>"*/
+ def decorateCodeBlockOpen():String = "<pre><code>"
+ /** used to print the end of a code block, defaults to "</code></pre>\n" */
+ def decorateCodeBlockClose():String = "</code></pre>\n"
+ /** used to print the beginning of a paragraph, defaults to "<p>" */
+ def decorateParagraphOpen():String = "<p>"
+ /** used to print the end of a paragraph, defaults to "</p>\n" */
+ def decorateParagraphClose():String = "</p>\n"
+ /** used to print the beginning of a blockquote, defaults to "<blockquote>" */
+ def decorateBlockQuoteOpen():String = "<blockquote>"
+ /** used to print the end of a blockquote, defaults to "</blockquote>\n" */
+ def decorateBlockQuoteClose():String = "</blockquote>\n"
+ /** used to print the beginning of a list item, defaults to "<li>" */
+ def decorateItemOpen():String = "<li>"
+ /** used to print the end of a list item, defaults to "</li>" */
+ def decorateItemClose():String = "</li>\n"
+ /** used to print the beginning of an unordered list, defaults to "<ul>\n" */
+ def decorateUListOpen():String = "<ul>\n"
+ /** used to print the end of an unordered list, defaults to "</ul>\n" */
+ def decorateUListClose():String = "</ul>\n"
+ /** used to print the beginning of an ordered list, defaults to <ol>\n */
+ def decorateOListOpen():String = "<ol>\n"
+ /** used to print the end of an ordered list, defaults to </ol>\n */
+ def decorateOListClose():String = "</ol>\n"
+}
+
+/**
+ * Default instance of Decorator with the standard Markdown behavior
+ */
+object Decorator extends Decorator {
+}
368 src/main/scala/eu/henkelmann/actuarius/InlineParsers.scala
@@ -0,0 +1,368 @@
+package eu.henkelmann.actuarius
+
+
+
+/**
+ * A parser for inline markdown, markdown escapes and XML escapes.
+ * This is used by the result classes of the block parsers to handle
+ * Markdown within a block.
+ */
+
+trait InlineParsers extends BaseParsers {
+
+ /**
+ * Defines how the output is formatted and whether inline xml elements are allowed.
+ */
+ def deco():Decorator = Decorator
+
+ /////////////////////////////////////
+ // Types we use for inline parsing //
+ /////////////////////////////////////
+
+ /**
+ * Defines a lookup map for link definitions.
+ */
+ type LinkMap = Map[String, LinkDefinition]
+
+ /**
+ * A set of tags we have already created. Used to prevent nesting a link in a link or an emphasis in an emphasis.
+ */
+ type VisitedTags = Set[String]
+
+ /**
+ * Keeps track of visited tags and provides a lookup for link ids.
+ */
+ case class InlineContext(val map:LinkMap, val tags:VisitedTags) {
+ def this(m:LinkMap) = this(m, Set())
+ def this() = this(Map())
+ def addTag(tag:String) = new InlineContext(map, tags + tag)
+ }
+
+ /** This array is used as a lookup for mapping markdown escapes
+ * to the resulting char (if necessary already escaped for XML)
+ * Disgusting, I know, but this is such an often called operation
+ * that this is the fastest way to do it, even in the year 2010.
+ */
+ private val escapableMarkdownChars = new Array[String](127)
+ escapableMarkdownChars('\\') = "\\"
+ escapableMarkdownChars('`') = "`"
+ escapableMarkdownChars('*') = "*"
+ escapableMarkdownChars('_') = "_"
+ escapableMarkdownChars('{') = "{"
+ escapableMarkdownChars('}') = "}"
+ escapableMarkdownChars('[') = "["
+ escapableMarkdownChars(']') = "]"
+ escapableMarkdownChars('(') = "("
+ escapableMarkdownChars(')') = ")"
+ escapableMarkdownChars('#') = "#"
+ escapableMarkdownChars('+') = "+"
+ escapableMarkdownChars('-') = "-"
+ escapableMarkdownChars('=') = "="
+ escapableMarkdownChars('>') = "&gt;"
+ escapableMarkdownChars('.') = "."
+ escapableMarkdownChars('!') = "!"
+
+ /**
+ * Parses markdown text up to any of the chars defined in the given map.
+ * used to quickly escape any text between special inline markdown like
+ * emphasis.
+ */
+ def markdownText(special:Set[Char], markdownEscapes:Boolean) = Parser{ in =>
+ if (in.atEnd) {
+ Failure("End of input.", in)
+ } else {
+ var start = in.offset
+ var i = in.offset
+ val s = in.source
+ val end = s.length
+ val result = new StringBuffer()
+ //process chars until we hit a special char or the end
+ while (i<end && !special.contains(s.charAt(i))) {
+ val c = s.charAt(i)
+ val xmlEscape = escapeFastForXml(c)
+ if (markdownEscapes && c == '\\' && i+1 < end && escapableMarkdownChars(s.charAt(i+1))!=null) {
+ result.append(s.subSequence(start, i).toString)
+ result.append(escapableMarkdownChars(s.charAt(i+1)))
+ i += 2
+ start = i
+ } else if (xmlEscape != null) {
+ result.append(s.subSequence(start, i).toString)
+ result.append(xmlEscape)
+ i += 1
+ start = i
+ } else {
+ i += 1
+ }
+ }
+ if (start != i) result.append(s.subSequence(start, i).toString)
+ if (result.length == 0) Failure("No text consumed.", in)
+ else Success(result.toString(), in.drop(i - in.offset))
+ }
+ }
+
+ /**
+ * all markdown inline element parsers or'ed together
+ */
+ //this was SLOW AS HELL
+ //def elementParsers(ctx:InlineContext) = Parser { inline =>
+ //markdownEscape | br | code | xmlTag | //simple inline
+ //a(ctx) | strong(ctx) | em(ctx) | fastA(ctx) | refA(ctx) | img(ctx) //recursive inline
+
+ //}
+ /* explicit match is faster than the map lookup
+ private val elementParserLookup:Map[Char,(InlineContext=>Parser[String])] = Map(
+ '\\' -> (ctx => aChar), ' ' -> (ctx => br), '`' -> (ctx => code), '<' -> (ctx => xmlTag | fastA(ctx)),
+ '[' -> (ctx => a(ctx) | refA(ctx)), '*' -> (ctx => spanAsterisk(ctx)), '_' -> (ctx => spanUnderscore(ctx)),
+ '!' -> (ctx => img(ctx))
+ )
+ */
+
+ //TODO:better handling of " \n" here. Stopping at every space costs us 20% time!
+ /** Chars that may indicate the start of a special Markdown inline sequence.
+ */
+ val specialInlineChars = Set(' ', '`', '<', '[', '*', '_', '!')
+ /** Chars that may indicate the start of a special markdown inline sequence or the end of a link text.
+ */
+ val specialLinkInlineChars = specialInlineChars + ']'
+
+ /** Hand rolled parser that parses a chunk of special inline markdown (like links or emphasis)
+ * based on a one char lookahead.
+ */
+ def elementParsers(ctx:InlineContext) = Parser{ in =>
+ if (in.atEnd) {
+ Failure("End of Input Reached", in)
+ } else {
+ in.first match {
+ case ' ' => br(in)
+ case '`' => code(in)
+ case '<' => (xmlTag | fastLink(ctx))(in)
+ case '[' => link(ctx)(in)
+ case '*' => spanAsterisk(ctx)(in)
+ case '_' => spanUnderscore(ctx)(in)
+ case '!' => img(ctx)(in)
+ case _ => Failure("Lookahead does not start inline element.", in)
+ }
+ }
+ }
+
+ /** Parses a single inline token. Either a span element or a chunk of text.
+ */
+ def oneInline(ctx:InlineContext):Parser[String] =
+ markdownText(specialInlineChars, true) | elementParsers(ctx) | aChar
+
+ /** Parser for inline markdown, always consumes all input, returns the resulting HTML.
+ */
+ def inline(m:LinkMap):Parser[String] = (oneInline(new InlineContext(m))*) ^^ {_.mkString}
+
+
+
+ ///////////////////////////////////////////////////////////
+ // Inline Elements: //
+ // br,code,xml tag,fast link,link,image,emphasis,strong, text chunk //
+ ///////////////////////////////////////////////////////////
+
+ /** Parses two spaces at the end of a line to a manual break (<br/>)
+ */
+ val br:Parser[String] = (" \n") ^^^ {deco.decorateBreak() + "\n"}
+
+
+ /** Parses an inline code element.
+ * An inline code element is surrounded by single backticks ("`")
+ * or double backticks ("``").
+ */
+ val code:Parser[String] = ((("``" ~> ((not("``")~> aChar)+) <~ "``")^^{_.mkString}) |
+ ('`' ~> markdownText(Set('`'), false) <~ '`') ) ^^ {
+ c => deco.decorateCode(c.mkString)
+ }
+
+
+ /** Parses any xml tag and escapes attribute values.
+ */
+ val xmlTag:Parser[String] = if (deco.allowVerbatimXml) (xmlEndTag | xmlStartOrEmptyTag)
+ else failure("Inline XML processing disabled.")
+
+
+ /** A shortcut markdown link of the form <http://example.com>
+ */
+ def fastLink(ctx:InlineContext):Parser[String] =
+ if (ctx.tags.contains("a")){
+ failure("Cannot nest a link in a link.")
+ } else {
+ elem('<') ~> markdownText(Set('>',' ', '<', '\n'), true) <~ '>' ^^ { u => deco.decorateLink(u, u, None) }
+ }
+
+ /** A link started by square brackets, either a reference or a a link with the full URL.
+ */
+ def link(ctx:InlineContext):Parser[String] = fullLink(ctx) | referenceLink(ctx)
+
+ /** A markdown link with the full url given.
+ */
+ def fullLink(ctx:InlineContext):Parser[String] =
+ if (ctx.tags.contains("a")){
+ failure("Cannot nest a link in a link.")
+ } else {
+ '[' ~> linkInline(ctx.addTag("a")) ~ ("](" ~ ows) ~ url ~ ows ~ title <~ (ows ~ ')') ^^ {
+ case txt ~ _ ~ u ~ _ ~ ttl => deco.decorateLink(txt, u, ttl)
+ }
+ }
+
+ /** A markdown link which references an url by id.
+ */
+ def referenceLink(ctx:InlineContext):Parser[String] =
+ if (ctx.tags.contains("a")){
+ failure("Cannot nest a link in a link.")
+ } else {
+ ref(ctx.addTag("a")) ^^ {
+ case (LinkDefinition(_, u, ttl), txt) => deco.decorateLink(txt, u, ttl)
+ }
+ }
+
+ /** Inline markdown in a link. Like normal inline stuff but stops when it reaches a closing square bracket.
+ */
+ def linkInline(ctx:InlineContext):Parser[String] = //( (not(']') ~> oneInline(ctx.addTag("a")))* ) ^^ {_.mkString}
+ ((markdownText(specialLinkInlineChars, true) | elementParsers(ctx) | ((not(']') ~> aChar)))*) ^^ {_.mkString}
+
+ /** We parse everything as a link/img url until we hit whitespace or a closing brace.
+ */
+ val url:Parser[String] = markdownText(Set(')', ' ', '\t'), true)
+
+ /** A title is everything in quotation marks. We allow even quotation marks in quotation marks.
+ * We look ahead if we hit the closing brace after the quotation marks to detect if the title
+ * ends or not.
+ */
+ val title:Parser[Option[String]] = opt('"' ~> ((markdownText(Set('"'),true) ~ opt(not('"'~ows~')') ~> aChar))*) <~ '"') ^^ {
+ case None => None
+ case Some(chunks) => {
+ val result = new StringBuilder()
+ for (chunk <- chunks) { chunk match {
+ case (text) ~ None => result.append(text)
+ case (text) ~ Some(s) => result.append(text).append(s)
+ } }
+ Some(result.toString)
+ }
+ }
+
+ /** Plaintext variant to refInline. Escapable text until a square bracket is hit.
+ */
+ val refText:Parser[String] = markdownText(Set(']'), true)
+
+ /** Parses an id reference. (Any text that is not a square bracket)
+ * Succeeds only if the parsed id is found in the given lookup.
+ * Returns the found link definition and the matched text.
+ */
+ def idReference(ctx:InlineContext):Parser[(String, LinkDefinition)] =
+ guard(acceptMatch(ctx.map)(refText ^^ (_.trim.toLowerCase))) ~ refText ^^ {case ld ~ t => (t, ld)}
+ /**
+ * A markdown reference of the form [text][id], [idText][] or [idText]
+ * Parser returns a tuple with the link definition first and the text to display second.
+ */
+ def ref(ctx:InlineContext):Parser[(LinkDefinition, String)] =
+ ('[' ~> linkInline(ctx) ~ (']' ~ opt(' ') ~ '[') ~ idReference(ctx) <~ ']' ^^ {
+ case t ~ dummy ~ pair => (pair._2, t)} ) |
+ ('[' ~> idReference(ctx) <~ (']' ~ opt(opt(' ') ~ '[' ~ ows ~ ']')) ^^ {
+ case (t, ld) => (ld, t)} )
+
+ /**
+ * Parses either a referenced or a directly defined image.
+ */
+ def img(ctx:InlineContext):Parser[String] = elem('!') ~> (directImg | refImg(ctx))
+
+ /** An image with an explicit path.
+ */
+ val directImg:Parser[String] =
+ elem('[') ~> refText ~ ("](" ~ ows) ~ url ~ ows ~ title <~ (ows ~ ')') ^^ {
+ case altText ~ _ ~ path ~ _ ~ ttl => deco.decorateImg(altText, path, ttl)
+ }
+ /**
+ * Parses a referenced image.
+ */
+ def refImg(ctx:InlineContext):Parser[String] = ref(ctx) ^^ {
+ case (LinkDefinition(_, u, ttl), alt) => deco.decorateImg(alt, u, ttl)
+ }
+
+ /** Parses inline in a span element like bold or emphasis or link up until the given end marker
+ */
+ def spanInline(end:Parser[Any], ctx:InlineContext):Parser[String] =
+ (markdownText(specialInlineChars, true) | elementParsers(ctx) | (not(end) ~> aChar)) ^^ {_.mkString}
+
+ /** Parses a span element like __foo__ or *bar*
+ */
+ def span(limiter:String, ctx:InlineContext):Parser[String] =
+ (limiter~not(ws))~>
+ (spanInline( (not(lookbehind(Set(' ', '\t', '\n'))) ~ limiter), ctx)+) <~
+ limiter ^^ {
+ _.mkString
+ }
+
+ /** Either an emphasis or a strong text wrapped in asterisks.
+ */
+ def spanAsterisk (ctx:InlineContext) = strongAsterisk(ctx) | emAsterisk(ctx)
+
+ /** Either an emphasis or strong text wrapped in underscores.
+ */
+ def spanUnderscore(ctx:InlineContext) = strongUnderscore(ctx) | emUnderscore(ctx)
+
+ /**Parses emphasized text wrapped in asterisks: *foo*
+ */
+ def emAsterisk(ctx:InlineContext):Parser[String] =
+ if (ctx.tags.contains("em")) {
+ failure("Cannot nest emphasis.")
+ } else {
+ span("*", ctx.addTag("em")) ^^ { deco.decorateEmphasis(_) }
+ }
+
+
+ /**Parses emphasized text wrapped in underscores: _foo_
+ */
+ def emUnderscore(ctx:InlineContext):Parser[String] =
+ if (ctx.tags.contains("em")) {
+ failure("Cannot nest emphasis.")
+ } else {
+ span("_", ctx.addTag("em")) ^^ { deco.decorateEmphasis(_) }
+ }
+
+ /**Parses strong text in asterisks: **foo**
+ */
+ def strongAsterisk(ctx:InlineContext):Parser[String] =
+ if (ctx.tags.contains("strong")) {
+ failure("Cannot nest strong text.")
+ } else {
+ span("**", ctx.addTag("strong")) ^^ { deco.decorateStrong(_) }
+ }
+
+ /**Parses strong text in underscores: __foo__
+ */
+ def strongUnderscore(ctx:InlineContext):Parser[String] =
+ if (ctx.tags.contains("strong")) {
+ failure("Cannot nest strong text.")
+ } else {
+ span("__", ctx.addTag("strong")) ^^ { deco.decorateStrong(_) }
+ }
+
+
+ /**
+ * Runs the inline parser on the given input and returns the result
+ */
+ def applyInline(s:String, m:LinkMap) = apply(inline(m), s)
+
+ /**
+ * Escapes the given string so it it can be embedded in xml.
+ * Markdown escapes are not processed.
+ */
+ def escapeXml(s:String) = {
+ var i = 0
+ val end = s.length
+ val result = new StringBuffer()
+ //process chars until we hit a special char or the end
+ while (i<end) {
+ val out = s.charAt(i)
+ //if it is a an xml reserved char, xml escape it, else just add it
+ val xmlEscape = escapeFastForXml(out)
+ if (xmlEscape != null) result.append(xmlEscape)
+ else result.append(out)
+ //advance a char
+ i += 1
+ }
+ result.toString
+ }
+}
235 src/main/scala/eu/henkelmann/actuarius/LineParsers.scala
@@ -0,0 +1,235 @@
+package eu.henkelmann.actuarius
+
+import scala.util.parsing.input.{Position, Reader}
+import java.util.StringTokenizer
+import scala.collection.mutable.{HashMap, ArrayBuffer, ListBuffer}
+
+
+/**
+ * Represents a line of markdown.
+ * The prefix is the beginning of the line that indicates the line type,
+ * the payload is the actual content after the prefix.
+ */
+sealed abstract class MarkdownLine(val prefix:String, val payload:String){
+ /**
+ * Constructs a MarkdownLine where the prefix is the empty String and the
+ * payload is the whole line.
+ */
+ def this(c:String) = this ("", c)
+
+ /**
+ * Returns the full line as it was originally, i.e. prefix+payload.
+ */
+ def fullLine = prefix + payload
+}
+
+/**Represents lines of verbatim xml.
+ * Actually this class is a little cheat, as it represents multiple lines.
+ * But it is a token that is created when "parsing with a line scope", so it is not too bad.
+ */
+case class XmlChunk(content:String) extends MarkdownLine(content)
+/** Represents the underline for a setext style header
+ */
+case class SetExtHeaderLine(content:String, headerLevel:Int) extends MarkdownLine(content)
+
+/**
+ * An atx style header line.
+ * Trims hashes automatically and determines the header level from them.
+ */
+case class AtxHeaderLine(pre:String, pay:String) extends MarkdownLine(pre, pay) {
+ /** removes all whitespace, nl and trailing hashes from the payload
+ * " foo ## \n" => "foo"
+ */
+ def trimHashes() = {
+ val s = payload.trim
+ var idx = s.length - 1
+ while (idx >= 0 && s.charAt(idx) == '#') idx -= 1
+ s.substring(0,idx+1).trim
+ }
+
+ def headerLevel = prefix.length
+}
+/** A line consisting only of whitespace.
+ */
+case class EmptyLine(content:String) extends MarkdownLine(content)
+/** A horizontal ruler line.
+ */
+case class RulerLine(content:String) extends MarkdownLine(content)
+/** A line indicating a block quote (starts with "> ")
+ */
+case class BlockQuoteLine(pre:String, pay:String) extends MarkdownLine(pre, pay)
+/** A line indicating the start of an unordered list item (starts with " * ")
+ */
+case class UItemStartLine(pre:String, pay:String) extends MarkdownLine(pre, pay)
+/** A line indicating the start of an ordered list item (starts with " [NUMBER]. ")
+ */
+case class OItemStartLine(pre:String, pay:String) extends MarkdownLine(pre, pay)
+/** A line in verbatim code or the continuation of a list item
+ */
+case class CodeLine(pre:String, pay:String) extends MarkdownLine(pre, pay)
+/** Any other line.
+ */
+case class OtherLine(content:String) extends MarkdownLine(content)
+
+
+/** Definition of a link or url that can be referenced by id.
+ */
+case class LinkDefinition(id:String, url:String, title:Option[String])
+
+/** Stub class that is an intermediate result when parsing link definitions.
+ */
+case class LinkDefinitionStart(id:String, url:String) {
+ def toLinkDefinition(title:Option[String]) = new LinkDefinition(id, url, title)
+}
+
+/**
+ * This class allows us to reference a map with link definitions resulting from the line parsing during block parsing.
+ * It extends a Reader for MarkdownLines and allows us to add the said map to the parsing context.
+ * This is basically a modification of the parser monad's state.
+ */
+case class MarkdownLineReader private (val lines:Seq[MarkdownLine],
+ val lookup:Map[String, LinkDefinition],
+ val lineCount:Int)
+ extends Reader[MarkdownLine] {
+ /** Not existing line that signals EOF.
+ * This object cannot be referenced by any other code so it will fail all line parsers.
+ */
+ private object EofLine extends MarkdownLine("\nEOF\n")
+
+
+ def this(ls:Seq[MarkdownLine], lu:Map[String, LinkDefinition]) = this(ls, lu, 1)
+ def this(ls:Seq[MarkdownLine]) = this (ls, Map())
+ def first = if (lines.isEmpty) EofLine else lines.head
+ def rest = if (lines.isEmpty) this else new MarkdownLineReader(lines.tail, lookup, lineCount + 1)
+ def atEnd = lines.isEmpty
+ def pos = new Position {
+ def line = lineCount
+ def column = 1
+ protected def lineContents = first.fullLine
+ }
+}
+
+/**
+ * Parses single lines into tokens.
+ * Markdown lines are differentiated by their beginning.
+ * These lines are then organized in blocks by the BlockParsers.
+ */
+trait LineParsers extends InlineParsers {
+
+ /////////////////////////////////
+ // Link definition pre-parsing //
+ /////////////////////////////////
+
+ /** The Start of a link definition: the id in square brackets, optionally indented by three spaces
+ */
+ def linkDefinitionId:Parser[String] =
+ """ {0,3}\[""".r ~> markdownText(Set(']'), true) <~ ("]:" ~ ows) ^^ {_.trim.toLowerCase}
+ /** The link url in a link definition.
+ */
+ def linkDefinitionUrl:Parser[String] =
+ (elem('<') ~> markdownText(Set('>'), true) <~ '>' ^^ {_.mkString.trim}) |
+ (markdownText(Set(' ','\t'), true) ^^ {_.mkString})
+ /** The title in a link definition.
+ */
+ def linkDefinitionTitle:Parser[String] =
+ ows ~> ("""\"[^\n]*["]""".r |
+ """\'[^\n]*\'""".r |
+ """\([^\n]*\)""".r) <~ ows ^^ { s => s.substring(1,s.length-1) }
+
+ /** A link definition that later gets stripped from the output.
+ * Either a link definition on one line or the first line of a two line link definition.
+ */
+ def linkDefinitionStart:Parser[(LinkDefinitionStart, Option[String])] =
+ linkDefinitionId ~ linkDefinitionUrl ~ opt(linkDefinitionTitle) ^^ {case i ~ u ~ t => (new LinkDefinitionStart(i, u), t)}
+
+
+ //////////////////////////////////////////
+ // Lines for XML Block tokenizing //
+ //////////////////////////////////////////
+
+ /** A line that starts an xml block: an opening xml element fragment.
+ */
+ def xmlBlockStartLine:Parser[String] = guard('<' ~ xmlName) ~> rest
+ /** A line that ends an xml block: a line starting with an xml end tag
+ */
+ def xmlBlockEndLine:Parser[String] = guard(xmlEndTag) ~> rest
+ /** A line not starting with an xml end tag
+ */
+ def notXmlBlockEndLine:Parser[String] = not(xmlEndTag) ~> rest
+
+
+ //////////////////////////////
+ // Markdown line tokenizing //
+ //////////////////////////////
+
+ /** Parses the line under a setext style level 1 header: =====
+ */
+ val setextHeader1:Parser[SetExtHeaderLine] = """=+([ \t]*)$""".r ^^ {new SetExtHeaderLine(_, 1)}
+
+ /** Parses the line under a setext style level 2 header: -----
+ */
+ val setextHeader2:Parser[SetExtHeaderLine] = """((\-)+)([ \t]*)$""".r ^^ {new SetExtHeaderLine(_, 2)}
+
+ /** Parses headers of the form: ### header ###
+ */
+ val atxHeader:Parser[AtxHeaderLine] = """#+""".r ~ rest ^^ {
+ case prefix ~ payload => new AtxHeaderLine(prefix, payload)
+ }
+
+ /** Parses a horizontal rule.
+ */
+ val ruler:Parser[MarkdownLine] = """ {0,3}(((-[ \t]*){3,})|((\*[ \t]*){3,}))$""".r ^^ { new RulerLine(_) }
+
+ /** Matches a line starting with up to three spaces, a '>' and an optional whitespace.
+ * (i.e.: the start or continuation of a block quote.)
+ */
+ val blockquoteLine:Parser[BlockQuoteLine] = """ {0,3}\>( )?""".r ~ rest ^^ {
+ case prefix ~ payload => new BlockQuoteLine(prefix,payload)
+ }
+
+
+ /** A line that starts an unordered list item.
+ * Matches a line starting with up to three spaces followed by an asterisk, a space, and any whitespace.
+ */
+ val uItemStartLine:Parser[UItemStartLine] = (""" {0,3}[\*\+-] [\t\v ]*""".r) ~ rest ^^ {
+ case prefix ~ payload => new UItemStartLine(prefix, payload)
+ }
+
+
+ /** A line that starts an ordered list item.
+ * Matches a line starting with up to three spaces followed by a number, a dot and a space, and any whitespace
+ */
+ val oItemStartLine:Parser[OItemStartLine] = (""" {0,3}[0-9]+\. [\t\v ]*""".r) ~ rest ^^ {
+ case prefix ~ payload => new OItemStartLine(prefix, payload)
+ }
+
+ /** Accepts an empty line. (A line that consists only of optional whitespace or the empty string.)
+ */
+ val emptyLine:Parser[MarkdownLine] = """([ \t]*)$""".r ^^ {new EmptyLine(_)}
+
+ /** Matches a code example line: any line starting with four spaces or a tab.
+ */
+ val codeLine:Parser[CodeLine] = (" " | "\t") ~ rest ^^ {
+ case prefix ~ payload => new CodeLine(prefix, payload)
+ }
+
+
+ /** Matches any line. Only called when all other line parsers have failed.
+ * Makes sure line tokenizing does not fail and we do not loose any lines on the way.
+ */
+ val otherLine:Parser[OtherLine] = rest ^^ {new OtherLine(_)}
+
+ ///////////////////////////////////////////////////////////////
+ // combined parsers for faster tokenizing based on lookahead //
+ ///////////////////////////////////////////////////////////////
+ /** First tries for a setext header level 2, then for a ruler.
+ */
+ val setext2OrRuler:Parser[MarkdownLine] = setextHeader2 | ruler
+ /** First tries for a ruler, then for an unordered list item start.
+ */
+ val rulerOrUItem:Parser[MarkdownLine] = ruler | uItemStartLine
+ /** First tries if the line is empty, if not tries for a code line.
+ */
+ val emptyOrCode:Parser[MarkdownLine] = emptyLine | codeLine
+}
+
227 src/main/scala/eu/henkelmann/actuarius/LineTokenizer.scala
@@ -0,0 +1,227 @@
+package eu.henkelmann.actuarius
+
+import scala.util.parsing.combinator.Parsers
+import scala.collection.mutable.{HashMap, ArrayBuffer}
+import scala.util.parsing.input.{Position, Reader}
+import scala.xml
+
+/**
+ * A Reader for reading whole Strings as tokens.
+ * Used by the Tokenizer to parse whole lines as one Element.
+ */
+case class LineReader private (val lines:Seq[String],
+ val lineCount:Int)
+ extends Reader[String] {
+ /**should never be used anywhere, just a string that should stick out for better debugging*/
+ private def eofLine = "EOF"
+ def this(ls:Seq[String]) = this(ls, 1)
+ def first = if (lines.isEmpty) eofLine else lines.head
+ def rest = if (lines.isEmpty) this else new LineReader(lines.tail, lineCount + 1)
+ def atEnd = lines.isEmpty
+ def pos = new Position {
+ def line = lineCount
+ def column = 1
+ protected def lineContents = first
+ }
+}
+
+/**
+ * Chops the input into lines and turns those lines into line tokens.
+ * Also takes care of preprocessing link definitions and xml blocks.
+ */
+class LineTokenizer() extends Parsers {
+ object lineParsers extends LineParsers
+
+ /**we munch whole lines (OM NOM NOM)
+ */
+ type Elem = String
+
+ /** Determines if xml blocks may be included verbatim.
+ * If true, they are passed through, else they are escaped and turned into paragraphs
+ */
+ def allowXmlBlocks = true
+
+ /**
+ * Returns a parser based on the given line parser.
+ * The resulting parser succeeds if the given line parser consumes the whole String.
+ */
+ def p[T](parser:lineParsers.Parser[T]):Parser[T] = Parser{in =>
+ if (in.atEnd) {
+ Failure("End of Input.", in)
+ } else {
+ lineParsers.parseAll(parser, in.first) match {
+ case lineParsers.Success(t, _) => Success(t, in.rest)
+ case n:lineParsers.NoSuccess => Failure(n.msg, in)
+ }
+ }
+ }
+
+ /** Returns the first char in the given string or a newline if the string is empty.
+ * This is done to speed up header parsing. Used to speed up line tokenizing substantially
+ * by using the first char in a line as lookahead for which parsers to even try.
+ */
+ def firstChar(line:String):Char = {
+ if (line.length == 0) '\n' else line.charAt(0)
+ }
+
+ /**Finds the char in the given line that is the best indication of what kind of markdown line this is.
+ * The “special” Markdown lines all start with up to three spaces. Those are skipped if present.
+ * The first char after those (up to)three spaces or a newline is returned.
+ */
+ def indicatorChar(line:String):Char = {
+ var i = 0
+ //skip the first three spaces, if present
+ while (i < 3 && i < line.length && line.charAt(i) == ' ') i += 1
+ //return the next char after the spaces or a newline if there are no more
+ if (i==line.length) '\n'
+ else line.charAt(i)
+ }
+
+ ////////////////////////
+ // Link definitions //
+ ////////////////////////
+
+ /** Tries to parse an URL from the next line if necessary.
+ * The passed tuple is the result from a previous parser and used to decide how to continue parsing.
+ */
+ def maybeUrlInNextLine(prev:(LinkDefinitionStart, Option[String])):Parser[LinkDefinition] = prev match {
+ case (lds, Some(title)) => success(lds.toLinkDefinition(Some(title)))
+ case (lds, None) => Parser {in =>
+ if (in.atEnd) {
+ Success(lds.toLinkDefinition(None), in)
+ } else {
+ lineParsers.parseAll(lineParsers.linkDefinitionTitle, in.first) match {
+ case lineParsers.Success(title, _) => Success(lds.toLinkDefinition(Some(title)), in.rest)
+ case _ => Success(lds.toLinkDefinition(None), in)
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses a link definition.
+ */
+ def linkDefinition:Parser[LinkDefinition] = p(lineParsers.linkDefinitionStart) into(maybeUrlInNextLine)
+
+ /////////////////
+ // XML blocks //
+ /////////////////
+ /** The start of a verbatim XML chunk: any line starting directly with an XML element
+ */
+ def xmlChunkStart = p(lineParsers.xmlBlockStartLine)
+
+ /** Parses any line that does not start with a closing XML element.
+ */
+ def notXmlChunkEnd = p(lineParsers.notXmlBlockEndLine)
+
+ /** Parses a line beginning with a closing XML tag.
+ */
+ def xmlChunkEnd = p(lineParsers.xmlBlockEndLine)
+
+ /** Very dumb parser for XML chunks.
+ */
+ def xmlChunk = xmlChunkStart ~ (notXmlChunkEnd*) ~ xmlChunkEnd ^^ {
+ case s ~ ms ~ e => new XmlChunk(s + "\n" + ms.mkString("\n") + "\n" + e + "\n")
+ }
+
+ /** Parses Markdown Lines. Always succeeds.
+ */
+ def lineToken = Parser{ in =>
+ if (in.atEnd) {
+ Failure("End of Input.", in)
+ } else {
+ val line = in.first
+ (firstChar(line), indicatorChar(line)) match {
+ case ('=', _) => p(lineParsers.setextHeader1)(in)
+ case ('-', _) => p(lineParsers.setext2OrRuler)(in)
+ case ('#', _) => p(lineParsers.atxHeader)(in)
+ case (_, '-') => p(lineParsers.ruler)(in)
+ case (_, '>') => p(lineParsers.blockquoteLine)(in)
+ case (_, '*') => p(lineParsers.rulerOrUItem)(in)
+ case (_, n) if (n >= '0' && n <= '9') => p(lineParsers.oItemStartLine)(in)
+ case (_, ' ') => p(lineParsers.emptyOrCode)(in)
+ case (_, '\t')=> p(lineParsers.emptyOrCode)(in)
+ case (_, '\n') => p(lineParsers.emptyLine)(in)
+ case _ => p(lineParsers.otherLine)(in)
+ }
+ }
+ } | p(lineParsers.otherLine) //this makes sure every line is consumed, even if our guess was no good
+
+ /** Parses link definitions and verbatim xml blocks
+ */
+ def preprocessToken = Parser{ in =>
+ if (in.atEnd) {
+ Failure("End of Input.", in)
+ } else {
+ val line = in.first
+ (firstChar(line), indicatorChar(line)) match {
+ //link definitions have absolute precedence
+ case (_, '[') => linkDefinition(in)
+ //then filter out xml blocks if allowed
+ case ('<', _) if (allowXmlBlocks) => xmlChunk(in)
+ //no token for preprocessing
+ case _ => Failure("No preprocessing token.", in)
+ }
+ }
+ }
+
+ /** Parses tokens that may occur inside a block. Works like the normal token parser except that
+ * it does not check for link definitions and verbatim XML.
+ */
+ def innerTokens(lookup:Map[String, LinkDefinition]):Parser[MarkdownLineReader] = phrase(lineToken *) ^^ {
+ case ts => new MarkdownLineReader(ts, lookup)
+ }
+
+ /** Parses first level line tokens, i.e. Markdown lines, XML chunks and link definitions.
+ */
+ def tokens:Parser[MarkdownLineReader] = phrase((preprocessToken | lineToken) *) ^^ { case ts =>
+ val lines = new ArrayBuffer[MarkdownLine]()
+ val lookup = new HashMap[String, LinkDefinition]()
+ for (t <- ts) { t match {
+ case ld:LinkDefinition => lookup(ld.id) = ld
+ case ml:MarkdownLine => lines.append(ml)
+ } }
+ new MarkdownLineReader(lines.toList, lookup.toMap)
+ }
+
+ /** Simple preprocessing: split the input at each newline. These whole lines are then fed to
+ * the actual Tokenizer.
+ */
+ def splitLines(s:String):List[String] = {
+ def chopWindoze(line:String) = {
+ if (line.endsWith("\r")) {
+ line.substring(0, line.length-1)
+ } else {
+ line
+ }
+ }
+
+ s.split('\n').map(chopWindoze(_)).toList
+ }
+
+ /** Turns a list of inner lines (the payloads of the lines making up the block)
+ * into line tokens. Does not check for XML chunks or link definitions.
+ */
+ def innerTokenize(lines:List[String], lookup:Map[String, LinkDefinition])=
+ innerTokens(lookup)(new LineReader(lines)) match {
+ case Success(reader, _) => reader
+ case n:NoSuccess =>
+ throw new IllegalStateException("Inner line Tokenizing failed. This is a bug. Message was: " + n.msg)
+ }
+
+ /** Tokenizes a whole Markdown document.
+ */
+ def tokenize(s:String):MarkdownLineReader = tokenize(splitLines(s))
+
+ /** Tokenizes a preprocessed Markdown document.
+ */
+ def tokenize(lines:List[String]):MarkdownLineReader = tokenize(new LineReader(lines))
+
+ /** Tokenizes preprocessed lines read from a line reader.
+ */
+ def tokenize(lines:Reader[String]):MarkdownLineReader = tokens(lines) match {
+ case Success(reader, _) => reader
+ case n:NoSuccess =>
+ throw new IllegalStateException("Tokenizing failed. This is a bug. Message was: " + n.msg)
+ }
+}
118 src/main/scala/eu/henkelmann/actuarius/TimeTest.scala
@@ -0,0 +1,118 @@
+package eu.henkelmann.actuarius
+
+import java.io.{InputStreamReader, FileInputStream, StringWriter}
+
+/**
+ * Quick and dirty test for measuring the time of this Parser.
+ * Contains hardcoded file paths, just ignore this, it will be removed soon.
+ */
+
+trait TimedTransformer {
+
+ /**
+ * Overwrite this method to return a custom decorator if you want modified output.
+ */
+ def deco():Decorator = Decorator
+
+ private object lineTokenizer extends LineTokenizer {
+ override def allowXmlBlocks() = TimedTransformer.this.deco().allowVerbatimXml()
+ }
+ private object blockParser extends BlockParsers {
+ override def deco() = TimedTransformer.this.deco()
+ }
+
+ /**
+ * This is the method that turns markdown source into xhtml.
+ */
+ def apply(s:String) = {
+
+ //first, run the input through the line parser
+ val (ms1,lineReader:MarkdownLineReader) = TimeTest.executionTime(()=>lineTokenizer.tokenize(s))
+
+ //then, run it through the block parser
+ val (ms2, result) = TimeTest.executionTime(()=>blockParser(lineReader))
+ println("lines=" + ms1 + ", blocks=" + ms2)
+ result
+ }
+}
+
+
+
+object TimeTest {
+ private object actuariusProcessor extends TimedTransformer()
+
+ private def readFile(path:String):String = {
+ //read from system input stream
+ val reader = new InputStreamReader(new FileInputStream(path))
+ val writer = new StringWriter()
+ val buffer = new Array[Char](1024)
+ var read = reader.read(buffer)
+ while (read != -1) {
+ writer.write(buffer, 0, read)
+ read = reader.read(buffer)
+ }
+ //turn read input into a string
+ writer.toString
+ }
+
+ def executionTime[T](f:(()=>T)):(Long, T) = {
+ val start = System.currentTimeMillis
+ val t = f()
+ val end = System.currentTimeMillis
+ (end - start, t)
+ }
+
+ private def runActuarius(markdown:String, iterations:Int) {
+ for (i <- 0 until iterations) actuariusProcessor(markdown)
+ }
+
+
+ def testRun(markdown:String, iterations:Int) {
+ println("Running Actuarius " + iterations + " times...")
+ println("... took " + (executionTime(() => runActuarius(markdown, iterations)))._1 + "ms")
+ }
+
+ object testParser extends BaseParsers {
+ //def ws1:Parser[String] = """( |\t|\v)+""".r
+ def ws2:Parser[String] = rep1(elem(' ') | elem('\t') | elem('\u000B')) ^^ {_.mkString}
+
+ def runParser(s:String, p:Parser[String], iterations:Int) {
+ for (i <- 0 until iterations) {
+ apply(p, s)
+ }
+ }
+ }
+
+ def runActuarius = {
+ val markdown = readFile("/home/chris/sbt_projects/markdown_race/test.txt").mkString*100
+ val iterations = 10
+ println("==== First run to warm up the VM: ====")
+ testRun(markdown, iterations)
+ println("==== Second run, JIT compiler should be done now: ====")
+ testRun(markdown, iterations)
+ }
+
+ def runWs = {
+ val wsString = " " * 1000
+ val iterations = 100000
+ println("Running ws...")
+ println("...took " + executionTime (() => testParser.runParser(wsString, testParser.ws, iterations))._1 + "ms")
+ //println("Running ws1...")
+ //println("...took " + executionTime (() => testParser.runParser(wsString, testParser.ws, iterations)))
+ println("Running ws2...")
+ println("...took " + executionTime (() => testParser.runParser(wsString, testParser.ws2, iterations))._1 + "ms")
+
+ }
+
+ def main(args:Array[String]) {
+ /*
+ val markdown = readFile("/home/chris/sbt_projects/markdown_race/test.txt").mkString*100
+ val iterations = 10
+ println("==== First run to warm up the VM: ====")
+ testRun(markdown, iterations)
+ println("==== Second run, JIT compiler should be done now: ====")
+ testRun(markdown, iterations)*/
+ //runWs
+ runActuarius
+ }
+}
72 src/main/scala/eu/henkelmann/actuarius/Transformer.scala
@@ -0,0 +1,72 @@
+package eu.henkelmann.actuarius
+
+import java.io.{InputStreamReader, StringWriter}
+
+/**
+ * This is the Transformer that uses the other parsers to transform markdown into xhtml.
+ * Mix this trait in if you want more control over the output (like switching verbatim xml on/off or using
+ * different opening/closing tags for the output).
+ */
+trait Transformer {
+
+ /**
+ * Overwrite this method to return a custom decorator if you want modified output.
+ */
+ def deco():Decorator = Decorator
+
+ private object lineTokenizer extends LineTokenizer {
+ override def allowXmlBlocks() = Transformer.this.deco().allowVerbatimXml()
+ }
+ private object blockParser extends BlockParsers {
+ override def deco() = Transformer.this.deco()
+ }
+
+ /**
+ * This is the method that turns markdown source into xhtml.
+ */
+ def apply(s:String) = {
+ //first, run the input through the line tokenizer
+ val lineReader = lineTokenizer.tokenize(s)
+ //then, run it through the block parser
+ blockParser(lineReader)
+ }
+}
+
+/**
+ * Simple Standalone Markdown transformer.
+ * Use this if you simply want to transform a block of markdown without any special options.
+ * val input:String = ...
+ * val xhtml:String = new ActuariusTransformer()(input)
+ *
+ * Note that Actuarius and hence this class is not thread-safe.
+ * This is because it is based on Scala Parser Combinators which are not thread-safe :(
+ * (though they should be IMHO)
+ */
+class ActuariusTransformer extends Transformer
+
+
+/**
+ * Contains a main methdod that simply reads everything from stdin, parses it as markdown and
+ * prints the result to stdout.
+ */
+object ActuariusApp extends Transformer {
+
+
+ def main(args:Array[String]) = {
+ //read from system input stream
+ val reader = new InputStreamReader(System.in)
+ val writer = new StringWriter()
+ val buffer = new Array[Char](1024)
+ var read = reader.read(buffer)
+ while (read != -1) {
+ writer.write(buffer, 0, read)
+ read = reader.read(buffer)
+ }
+ //turn read input into a string
+ val input = writer.toString
+ //run that string through the transformer trait's apply method
+ val output = apply(input)
+ //print result to stdout
+ print(output)
+ }
+}
52 src/test/scala/eu/henkelmann/actuarius/BaseParsersTest.scala
@@ -0,0 +1,52 @@
+package eu.henkelmann.actuarius
+
+import org.scalatest.FlatSpec
+import org.scalatest.matchers.ShouldMatchers
+import collection.SortedMap
+
+/**
+ * Tests basic parsers that are used by the more complex parsing steps.
+ */
+
+class BaseParsersTest extends FlatSpec with ShouldMatchers with BaseParsers{
+
+ "The BaseParsers" should "parse a newline" in {
+ val p = nl
+ apply(p, "\n") should equal ("\n")
+ evaluating(apply(p, "\r\n")) should produce[IllegalArgumentException]
+ evaluating(apply(p, " \n")) should produce[IllegalArgumentException]
+ }
+
+ it should "parse whitespace" in {
+ val p = ws
+ apply(p, " ") should equal (" ")
+ apply(p, "\t") should equal ("\t")
+ apply(p, " ") should equal (" ")
+ apply(p, "\t\t") should equal ("\t\t")
+ apply(p, " \t \t ") should equal (" \t \t ")
+ //we want newlines to be treated diferrently from other ws
+ evaluating (apply(p, "\n")) should produce[IllegalArgumentException]
+ }
+
+ it should "be able to look behind" in {
+ apply (((elem('a') ~ lookbehind(Set('a')) ~ elem('b'))^^{case a~lb~b=>a+""+b}), "ab") should equal ("ab")
+ evaluating {apply (((elem('a') ~ lookbehind(Set('b')) ~ elem('b'))^^{case a~b=>a+""+b}), "ab")} should produce[IllegalArgumentException]
+
+ apply( (elem('a') ~ not(lookbehind(Set(' ', '\t', '\n'))) ~ '*' ), "a*" )
+
+ }
+
+ it should "parse chars in ranges" in {
+ val p = ranges(SortedMap('A' -> 'Z', '0' -> '9'))
+ apply(p, "B") should equal ('B')
+ apply(p, "A") should equal ('A')
+ apply(p, "Z") should equal ('Z')
+ apply(p, "5") should equal ('5')
+ apply(p, "0") should equal ('0')
+ apply(p, "9") should equal ('9')
+ evaluating (apply(p, "a")) should produce[IllegalArgumentException]
+ evaluating (apply(p, "z")) should produce[IllegalArgumentException]
+ evaluating (apply(p, "<")) should produce[IllegalArgumentException]
+ }
+
+}
36 src/test/scala/eu/henkelmann/actuarius/BlockParsersTest.scala
@@ -0,0 +1,36 @@
+package eu.henkelmann.actuarius
+
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.FlatSpec
+import xml.{Group, NodeSeq}
+
+/**
+ * Tests the parsing on block level.
+ */
+@RunWith(classOf[JUnitRunner])
+class BlockParsersTest extends FlatSpec with ShouldMatchers with BlockParsers{
+
+ "The BlockParsers" should "parse optional empty lines" in {
+ val p = optEmptyLines
+ val el = new EmptyLine(" \n")
+ apply(p, Nil) should equal (Nil)
+ apply(p, List(el)) should equal (List(el))
+ apply(p, List(el, el)) should equal (List(el, el))
+ }
+
+ it should "accept empty documents" in {
+ val p = markdown
+ val el = new EmptyLine(" \n")
+ apply(p, Nil) should equal (Nil)
+ apply(p, List(el)) should equal (Nil)
+ apply(p, List(el, el)) should equal (Nil)
+ }
+
+ it should "detect line types" in {
+ val p = line(classOf[CodeLine])
+ apply(p, List(new CodeLine(" ", "code"))) should equal (new CodeLine(" ", "code"))
+ evaluating(apply(p, List(new OtherLine("foo")))) should produce[IllegalArgumentException]
+ }
+}
240 src/test/scala/eu/henkelmann/actuarius/InlineParsersTest.scala
@@ -0,0 +1,240 @@
+package eu.henkelmann.actuarius
+
+import org.scalatest.FlatSpec
+import org.scalatest.matchers.ShouldMatchers
+
+/**
+ * Tests Inline Parsing, i.e. emphasis , strong text, links, escapes etc.
+ */
+
+class InlineParsersTest extends FlatSpec with ShouldMatchers with InlineParsers{
+
+ ///////////////////////////////////////////////////////////////
+ // Inline parsing Tests //
+ ///////////////////////////////////////////////////////////////
+ def runSucceedingParsingTests(p:Parser[String], l:List[(String, String)]) {
+ for ((a, b) <- l) {
+ try {
+ apply(p, a) should equal (b)
+ } catch {
+ case e => println("Input causing the failure was: '" + a + "'."); throw e;
+ }
+ }
+ }
+
+ def runExceptionParsingTests(p:Parser[String], l:List[String]) {
+ for (s <- l) evaluating{apply(p, s)} should produce[IllegalArgumentException]
+ }
+
+ val italicTests:List[(String, String)] = List(
+ ("*italic*", "<em>italic</em>"),
+ ("*italic * italic*", "<em>italic * italic</em>"),
+ ("_italic_", "<em>italic</em>"))
+
+ val boldTests = List(
+ ("**bold**", "<strong>bold</strong>"),
+ ("**bold * bold**", "<strong>bold * bold</strong>"),
+ ("__bold__", "<strong>bold</strong>"))
+
+ val codeTests = List(
+ ("`code`", "<code>code</code>"),
+ ("``code``", "<code>code</code>"),
+ ("` *italic* `", "<code> *italic* </code>"),
+ ("`code\ncode`", "<code>code\ncode</code>"),
+ ("``code ` code``", "<code>code ` code</code>")
+ )
+
+ val linkTests = List(
+ ("""[link text](http://example.com "link title")""",
+ """<a href="http://example.com" title="link title">link text</a>"""),
+ ("""[link text](http://example.com )""",
+ """<a href="http://example.com">link text</a>"""),
+ ("""[link text]( http://example.com "link title" )""",
+ """<a href="http://example.com" title="link title">link text</a>"""),
+ ("""[link text]( http://example.com "li)nk" title" )""",
+ """<a href="http://example.com" title="li)nk&quot; title">link text</a>""")
+ )
+
+ val fastLinkTests = List(
+ ("""<http://www.example.com?foo=a&bar=b\*>""",
+ """<a href="http://www.example.com?foo=a&amp;bar=b*">http://www.example.com?foo=a&amp;bar=b*</a>""")
+ )
+
+
+ val imageTests = List(
+ ("""![alt text](/src/img.png "img title")""",
+ """<img src="/src/img.png" alt="alt text" title="img title" />"""),
+ ("""![alt text](/src/img.png )""",
+ """<img src="/src/img.png" alt="alt text" />"""),
+ ("""![alt text]( /src/img.png "img title" )""",
+ """<img src="/src/img.png" alt="alt text" title="img title" />"""),
+ ("""![alt text]( /src/img.png "i)mg" title" )""",
+ """<img src="/src/img.png" alt="alt text" title="i)mg&quot; title" />""")
+ )
+
+ val brTests = List(
+ (" \n", "<br />\n")
+ )
+
+ val xmlNameTests = List(
+ ("foo", "foo"),
+ ("foo_bar", "foo_bar"),
+ ("a", "a")
+ )
+
+ val xmlNameExTests = List(
+ "",
+ "foo/bar",
+ "foo<bar",
+ "foo>bar",
+ "foo\"bar",
+ "foo\\bar",
+ "foo bar"
+ )
+
+ val xmlStartTagTests = List(
+ ("<foo>", "<foo>"),
+ ("""<foo attr="bar">""", """<foo attr="bar">"""),
+ ("""<foo attr="bar" attr2="baz">""", """<foo attr="bar" attr2="baz">"""),
+ ("""<a href="http://www.example.com?p1=a&p2=b">""", """<a href="http://www.example.com?p1=a&amp;p2=b">""")
+ )
+
+ val xmlEndTagTests = List(
+ ("</foo>", "</foo>"),
+ ("</a>", "</a>")
+ )
+
+ val xmlInlineTests = List(
+ ("""hallo <foo attr="&'<>">*italic*</foo> ballo""",
+ """hallo <foo attr="&amp;&apos;&lt;&gt;"><em>italic</em></foo> ballo"""),
+ ("""hallo <foo attr="&'<>"/>*italic*<foo/> ballo""",
+ """hallo <foo attr="&amp;&apos;&lt;&gt;"/><em>italic</em><foo/> ballo""")
+ )
+
+ val mixedTests = List(
+ ("*italic* **bold** *italic*", "<em>italic</em> <strong>bold</strong> <em>italic</em>"),
+ ("*italic***bold***italic*", "<em>italic<strong>*bold</strong></em>italic*"),
+ ("***foo***", "<strong><em>foo</em></strong>")
+ )
+
+
+ /**
+ * These should pass the inline replacement unchanged and can be used to be put between "real tests" to simualate
+ * intermediate text.
+ */
+ val dummyTests = List(
+ ("lorem ipsum ", "lorem ipsum "),
+ (" lorem ipsum", " lorem ipsum"),
+ (" lorem \n ipsum ", " lorem \n ipsum ")
+ )
+
+
+ val allInlineTests = italicTests ++ boldTests ++ codeTests ++ linkTests ++ fastLinkTests ++ imageTests ++ brTests ++
+ xmlStartTagTests ++ xmlEndTagTests ++ xmlInlineTests ++ dummyTests
+
+ it should "create italic text" in {
+ runSucceedingParsingTests(emAsterisk(new InlineContext())|emUnderscore(new InlineContext()) , italicTests)
+ }
+
+ it should "create bold text" in {
+ runSucceedingParsingTests(strongAsterisk(new InlineContext())|strongUnderscore(new InlineContext()), boldTests)
+ }
+
+ it should "create inline code" in {
+ runSucceedingParsingTests(code, codeTests)
+ }
+
+ it should "create links" in {
+ runSucceedingParsingTests(link(new InlineContext()), linkTests)
+ }
+
+ it should "create fast links" in {
+ runSucceedingParsingTests(fastLink(new InlineContext()), fastLinkTests)
+ val p = fastLink(new InlineContext())
+ evaluating(apply(p, "<this is not a fast link<span>")) should produce[IllegalArgumentException]
+
+ }
+
+ it should "create images" in {
+ runSucceedingParsingTests((elem('!')~>directImg), imageTests)
+ }
+
+ it should "create line breaks" in {
+ runSucceedingParsingTests(br, brTests)
+ }
+
+ it should "parse simplified xml identifiers" in {
+ runSucceedingParsingTests(xmlName, xmlNameTests)
+ runExceptionParsingTests(xmlName, xmlNameExTests)
+ }
+
+ it should "parse opening xml tags and escape their attribute vals" in {
+ runSucceedingParsingTests(xmlStartOrEmptyTag, xmlStartTagTests)
+ }
+
+ it should "parse closing xml tags" in {
+ runSucceedingParsingTests(xmlEndTag, xmlEndTagTests)
+ }
+
+ it should "allow inline xml and escape its parameters" in {
+ runSucceedingParsingTests(inline(Map()), xmlInlineTests)
+ }
+
+ it should "parse mixed inline cases" in {
+ runSucceedingParsingTests(inline(Map()), mixedTests)
+ }
+
+ val ld1 = new LinkDefinition("id", "http://www.example.com", Some("Title"))
+ val ld2 = new LinkDefinition("id 2", "http://other.example.com", Some("Title 2"))
+ val ld3 = new LinkDefinition("id 3", "http://none.example.com", None)
+ val map = Map(ld1.id -> ld1, ld2.id -> ld2, ld3.id -> ld3)
+ val ctx = new InlineContext(map)
+
+ it should "resolve references" in {
+ val p = ref(ctx)
+ apply(p, "[text][id]") should equal ((ld1, "text"))
+ apply(p, "[text] [id]") should equal ((ld1, "text"))
+ apply(p, "[id][]") should equal ((ld1, "id"))
+ apply(p, "[id] []") should equal ((ld1, "id"))
+ apply(p, "[id]") should equal ((ld1, "id"))
+ apply(p, "[Id]") should equal ((ld1, "Id"))
+ }
+
+ it should "resolve reference links" in {
+ val p = inline(map)
+ apply(p, "[text][id]") should equal ("""<a href="http://www.example.com" title="Title">text</a>""")
+ apply(p, "[text] [id]") should equal ("""<a href="http://www.example.com" title="Title">text</a>""")
+ apply(p, "[id][]") should equal ("""<a href="http://www.example.com" title="Title">id</a>""")
+ apply(p, "[id] []") should equal ("""<a href="http://www.example.com" title="Title">id</a>""")
+ apply(p, "[id]") should equal ("""<a href="http://www.example.com" title="Title">id</a>""")
+ apply(p, "[Id]") should equal ("""<a href="http://www.example.com" title="Title">Id</a>""")