).
+ */
+ "doAutoLinks" => 30,
+ "encodeAmpsAndAngles" => 40,
+
+ "doItalicsAndBold" => 50,
+ "doHardBreaks" => 60,
+ );
+
+ protected $em_relist = array(
+ '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(?_config = $config;
+ $this->suffix = ($this->_config['tab_width'] == 'html') ? '>' : ' />';
+ $this->tab_width = $this->_config['tab_width'];
+
+ $this->init_detab();
+ $this->prepare_italics_and_bold();
+
+ $this->nested_brackets_re =
+ str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
+ str_repeat('\])*', $this->nested_brackets_depth);
+
+ $this->nested_url_parenthesis_re =
+ str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
+ str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
+
+ $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
+
+ # Sort document, block, and span gamut in ascendent priority order.
+ asort($this->document_gamut);
+ asort($this->block_gamut);
+ asort($this->span_gamut);
+ }
+
+ /**
+ * Creates a singleton of a Kohana Markdown parser. If no parser is supplied
+ * the __default__ markdown parser is used.
+ *
+ * // Create an instance of the default parser
+ * $default_parser = Markdown::instance();
+ *
+ * // Create an instance of a parser
+ * $foo_parser = Markdown::instance('foo');
+ *
+ * // Access an instantiated parser directly
+ * $foo_parser = Markdown::$instances['default'];
+ *
+ * @param string the name of the markdown parser to use [Optional]
+ * @return Kohana_Markdown
+ * @throws Kohana_Markdown_Exception
+ */
+ public static function instance($parser = NULL)
+ {
+ /* If there is no parser supplied, use the default parser */
+ if ($parser === NULL) {
+ $parser = Markdown::$default;
+ }
+
+ /* Return the current parser if its already initiated */
+ if (isset(Markdown::$instances[$parser])) {
+ return Markdown::$instances[$parser];
+ }
+
+ $config = Kohana::config('markdown');
+ if (!$config->offsetExists($parser)) {
+ throw new Kohana_Markdown_Exception('Failed to load Kohana Markdown parser: :parser', array(':parser' => $parser));
+ }
+ $config = $config->get($parser);
+
+ /* Create a new markdown instance */
+ Markdown::$instances[$parser] = new Kohana_Markdown($config);
+
+ /* Return the instance */
+ return Markdown::$instances[$parser];
+ }
+
+ /**
+ * Overload the __clone() method to prevent cloning
+ *
+ * @return void
+ * @throws Kohana_Markdown_Exception
+ */
+ public function __clone()
+ {
+ throw new Kohana_Markdown_Exception('Cloning of Kohana_Markdown objects is forbidden');
+ }
+
+ /**
+ * Called before the transformation process starts to setup parser states.
+ *
+ * @return void
+ */
+ protected function before()
+ {
+ /* Clear out the global hashes. */
+ $this->urls = $this->predef_urls;
+ $this->titles = $this->predef_titles;
+ $this->html_hashes = array();
+
+ $in_anchor = false;
+ }
+
+ /**
+ * Called after the transformation process to clear any variable which may
+ * be taking up memory unnecessarly.
+ *
+ * @return void
+ */
+ protected function after()
+ {
+ $this->urls = array();
+ $this->titles = array();
+ $this->html_hashes = array();
+ }
+
+ /**
+ * Main function. Performs some preprocessing on the input text and pass it
+ * through the document gamut.
+ *
+ * @param string The markdown content to be transformed into (x)html.
+ * @return string The transformed (x)html.
+ */
+ public function transform($text)
+ {
+ /* Initialise the storage arrays */
+ $this->before();
+
+ /* Remove UTF-8 BOM and marker character in input, if present */
+ $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
+
+ /* Standardize line endings: DOS to Unix and Mac to Unix */
+ $text = preg_replace('{\r\n?}', "\n", $text);
+
+ /* Make sure $text ends with a couple of newlines */
+ $text .= "\n\n";
+
+ /* Convert all tabs to spaces. */
+ $text = $this->detab($text);
+
+ /* Turn block-level HTML blocks into hash entries */
+ $text = $this->hash_HTML_blocks($text);
+
+ /* Strip any lines consisting only of spaces and tabs. This makes any
+ * subsequent regexen easier to write, because we can match consecutive
+ * blank lines with /\n+/ instead of something contorted like /[ ]*\n+/
+ */
+ $text = preg_replace('/^[ ]+$/m', '', $text);
+
+ /* Run document gamut methods */
+ foreach ($this->document_gamut as $method => $priority) {
+ $text = $this->$method($text);
+ }
+
+ /* Cleanup memory */
+ $this->after();
+
+ /* Return the new (x)html content */
+ return $text . "\n";
+ }
+
+ /**
+ * Strips link definitions from text, stores the URLs and titles in hash
+ * references.
+ *
+ * @param string The markdown getting transformed.
+ * @return string The resulting hash references for link definitions.
+ */
+ protected function strip_link_definitions($text)
+ {
+ $less_than_tab = $this->tab_width - 1;
+
+ /* Link defs are in the form: ^[id]: url "optional title" */
+ $text = preg_replace_callback('{
+ ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
+ [ ]*
+ \n? # maybe *one* newline
+ [ ]*
+ (?:
+ <(.+?)> # url = $2
+ |
+ (\S+?) # url = $3
+ )
+ [ ]*
+ \n? # maybe one newline
+ [ ]*
+ (?:
+ (?<=\s) # lookbehind for whitespace
+ ["(]
+ (.*?) # title = $4
+ [")]
+ [ ]*
+ )? # title is optional
+ (?:\n+|\Z)
+ }xm',
+ array(&$this, '_strip_link_definitions_callback'), $text);
+ return $text;
+ }
+
+ protected function _strip_link_definitions_callback($matches)
+ {
+ $link_id = strtolower($matches[1]);
+ $url = $matches[2] == '' ? $matches[3] : $matches[2];
+ $this->urls[$link_id] = $url;
+ $this->titles[$link_id] =& $matches[4];
+ return ''; /* String that will replace the block */
+ }
+
+ /**
+ * Hashify HTML blocks:
+ * We only want to do this for block-level HTML tags, such as headers,
+ * lists, and tables. That's because we still want to wrap s around
+ * "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ * phrase emphasis, and spans. The list of tags we're looking for is
+ * hard-coded:
+ *
+ * * List "a" is made of tags which can be both inline or block-level.
+ * These will be treated block-level when the start tag is alone on
+ * its line, otherwise they're not matched here and will be taken as
+ * inline later.
+ * * List "b" is made of tags which are always block-level;
+ *
+ * @param string The markdown getting transformed.
+ * @return string The resulting hash references for link definitions.
+ */
+ protected function hash_HTML_blocks($text)
+ {
+ if ($this->no_markup) return $text;
+
+ $less_than_tab = $this->tab_width - 1;
+
+ $block_tags_a_re = 'ins|del';
+ $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
+ 'script|noscript|form|fieldset|iframe|math';
+
+ /* Regular expression for the content of a block tag. */
+ $nested_tags_level = 4;
+ $attr = '
+ (?> # optional tag attributes
+ \s # starts with whitespace
+ (?>
+ [^>"/]+ # text outside quotes
+ |
+ /+(?!>) # slash not followed by ">"
+ |
+ "[^"]*" # text inside double quotes (tolerate ">")
+ |
+ \'[^\']*\' # text inside single quotes (tolerate ">")
+ )*
+ )?
+ ';
+ $content =
+ str_repeat('
+ (?>
+ [^<]+ # content without tag
+ |
+ <\2 # nested opening tag
+ '.$attr.' # attributes
+ (?>
+ />
+ |
+ >', $nested_tags_level). # end of opening tag
+ '.*?'. # last level nested tag content
+ str_repeat('
+ \2\s*> # closing nested tag
+ )
+ |
+ <(?!/\2\s*> # other tags with a different name
+ )
+ )*',
+ $nested_tags_level);
+ $content2 = str_replace('\2', '\3', $content);
+
+ /* First, look for nested blocks, e.g.:
+ *
+ *
+ * tags for inner block must be indented.
+ *
+ *
+ *
+ * The outermost tags must start at the left margin for this to match, and
+ * the inner nested divs must be indented.
+ * We need to do this before the next, more liberal match, because the next
+ * match will start at the first `` and stop at the first `
`.
+ */
+ $text = preg_replace_callback('{(?>
+ (?>
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+
+ # Match from `\n` to `\n`, handling nested tags
+ # in between.
+
+ [ ]{0,'.$less_than_tab.'}
+ <('.$block_tags_b_re.')# start tag = $2
+ '.$attr.'> # attributes followed by > and \n
+ '.$content.' # content, support nesting
+ \2> # the matching end tag
+ [ ]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+
+ | # Special version for tags of group a.
+
+ [ ]{0,'.$less_than_tab.'}
+ <('.$block_tags_a_re.')# start tag = $3
+ '.$attr.'>[ ]*\n # attributes followed by >
+ '.$content2.' # content, support nesting
+ \3> # the matching end tag
+ [ ]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+
+ | # Special case just for
. It was easier to make a special
+ # case than to make the other regex more complicated.
+
+ [ ]{0,'.$less_than_tab.'}
+ <(hr) # start tag = $2
+ '.$attr.' # attributes
+ /?> # the matching end tag
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ | # Special case for standalone HTML comments:
+
+ [ ]{0,'.$less_than_tab.'}
+ (?s:
+
+ )
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ | # PHP and ASP-style processor instructions ( and <%)
+
+ [ ]{0,'.$less_than_tab.'}
+ (?s:
+ <([?%]) # $2
+ .*?
+ \2>
+ )
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ )
+ )}Sxmi',
+ array(&$this, '_hash_HTML_blocks_callback'),
+ $text);
+
+ return $text;
+ }
+
+ protected function _hash_HTML_blocks_callback($matches)
+ {
+ $text = $matches[1];
+ $key = $this->hash_block($text);
+ return "\n\n$key\n\n";
+ }
+
+ /**
+ * Called whenever a tag must be hashed when a function insert an atomic
+ * element in the text stream. Passing $text to through this function gives
+ * a unique text-token which will be reverted back when calling unhash.
+ *
+ * The $boundary argument specify what character should be used to surround
+ * the token. By convension, "B" is used for block elements that needs not
+ * to be wrapped into paragraph tags at the end, ":" is used for elements
+ * that are word separators and "X" is used in the general case.
+ *
+ * @param string The markdown getting transformed.
+ * @param string The boundry to use to surround a token.
+ * @return string String that will replace the tag.
+ */
+ protected function hash_part($text, $boundary = 'X')
+ {
+ /* Swap back any tag hash found in $text so we dont have to `unhash`
+ * multiple times at the end.
+ */
+ $text = $this->unhash($text);
+
+ /* Then hash the block. */
+ static $i = 0;
+ $key = "$boundary\x1A" . ++$i . $boundary;
+ $this->html_hashes[$key] = $text;
+ return $key;
+ }
+
+ /**
+ * Shortcut function for hash_part with block-level boundaries.
+ * @param string The markdown getting transformed.
+ * @return string String that will replace the tag.
+ */
+ protected function hash_block($text)
+ {
+ return $this->hash_part($text, 'B');
+ }
+
+ /**
+ * Run block gamut tranformations.
+ *
+ * @param string The markdown getting transformed.
+ * @return string String that will replace the tag.
+ */
+ protected function run_block_gamut($text)
+ {
+ /* We need to escape raw HTML in Markdown source before doing anything
+ * else. This need to be done for each block, and not only at the
+ * begining in the Markdown function since hashed blocks can be part of
+ * list items and could have been indented. Indented blocks would have
+ * been seen as a code block in a previous pass of hash_HTML_blocks.
+ */
+ $text = $this->hash_HTML_blocks($text);
+
+ return $this->run_basic_block_gamut($text);
+ }
+
+ /**
+ * Run block gamut tranformations, without hashing HTML blocks. This is
+ * useful when HTML blocks are known to be already hashed, like in the first
+ * whole-document pass.
+ *
+ * @param string The markdown getting transformed.
+ * @return string String that will replace the tag.
+ */
+ protected function run_basic_block_gamut($text)
+ {
+ foreach ($this->block_gamut as $method => $priority) {
+ $text = $this->$method($text);
+ }
+
+ # Finally form paragraph and restore hashed blocks.
+ $text = $this->formParagraphs($text);
+
+ return $text;
+ }
+
+ protected function doHorizontalRules($text)
+ {
+ # Do Horizontal Rules:
+ return preg_replace(
+ '{
+ ^[ ]{0,3} # Leading space
+ ([-*_]) # $1: First marker
+ (?> # Repeated marker group
+ [ ]{0,2} # Zero, one, or two spaces.
+ \1 # Marker character
+ ){2,} # Group repeated at least twice
+ [ ]* # Tailing spaces
+ $ # End of line.
+ }mx',
+ "\n".$this->hash_block("
suffix")."\n",
+ $text);
+ }
+
+ protected function runSpanGamut($text)
+ {
+ #
+ # Run span gamut tranformations.
+ #
+ foreach ($this->span_gamut as $method => $priority) {
+ $text = $this->$method($text);
+ }
+
+ return $text;
+ }
+
+ protected function doHardBreaks($text)
+ {
+ # Do hard breaks:
+ return preg_replace_callback('/ {2,}\n/',
+ array(&$this, '_doHardBreaks_callback'), $text);
+ }
+ protected function _doHardBreaks_callback($matches)
+ {
+ return $this->hash_part("
suffix\n");
+ }
+
+ function doAnchors($text)
+ {
+ #
+ # Turn Markdown link shortcuts into XHTML tags.
+ #
+ if ($this->in_anchor) return $text;
+ $this->in_anchor = true;
+
+ #
+ # First, handle reference-style links: [link text] [id]
+ #
+ $text = preg_replace_callback('{
+ ( # wrap whole match in $1
+ \[
+ ('.$this->nested_brackets_re.') # link text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+ )
+ }xs',
+ array(&$this, '_doAnchors_reference_callback'), $text);
+
+ #
+ # Next, inline-style links: [link text](url "optional title")
+ #
+ $text = preg_replace_callback('{
+ ( # wrap whole match in $1
+ \[
+ ('.$this->nested_brackets_re.') # link text = $2
+ \]
+ \( # literal paren
+ [ \n]*
+ (?:
+ <(.+?)> # href = $3
+ |
+ ('.$this->nested_url_parenthesis_re.') # href = $4
+ )
+ [ \n]*
+ ( # $5
+ ([\'"]) # quote char = $6
+ (.*?) # Title = $7
+ \6 # matching quote
+ [ \n]* # ignore any spaces/tabs between closing quote and )
+ )? # title is optional
+ \)
+ )
+ }xs',
+ array(&$this, '_doAnchors_inline_callback'), $text);
+
+ #
+ # Last, handle reference-style shortcuts: [link text]
+ # These must come last in case you've also got [link text][1]
+ # or [link text](/foo)
+ #
+ $text = preg_replace_callback('{
+ ( # wrap whole match in $1
+ \[
+ ([^\[\]]+) # link text = $2; can\'t contain [ or ]
+ \]
+ )
+ }xs',
+ array(&$this, '_doAnchors_reference_callback'), $text);
+
+ $this->in_anchor = false;
+ return $text;
+ }
+
+ protected function _doAnchors_reference_callback($matches)
+ {
+ $whole_match = $matches[1];
+ $link_text = $matches[2];
+ $link_id =& $matches[3];
+
+ if ($link_id == "") {
+ # for shortcut links like [this][] or [this].
+ $link_id = $link_text;
+ }
+
+ # lower-case and turn embedded newlines into spaces
+ $link_id = strtolower($link_id);
+ $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
+
+ if (isset($this->urls[$link_id])) {
+ $url = $this->urls[$link_id];
+ $url = $this->encodeAttribute($url);
+
+ $result = "titles[$link_id] ) ) {
+ $title = $this->titles[$link_id];
+ $title = $this->encodeAttribute($title);
+ $result .= " title=\"$title\"";
+ }
+
+ $link_text = $this->runSpanGamut($link_text);
+ $result .= ">$link_text";
+ $result = $this->hash_part($result);
+ }
+ else {
+ $result = $whole_match;
+ }
+ return $result;
+ }
+
+ protected function _doAnchors_inline_callback($matches)
+ {
+ $whole_match = $matches[1];
+ $link_text = $this->runSpanGamut($matches[2]);
+ $url = $matches[3] == '' ? $matches[4] : $matches[3];
+ $title =& $matches[7];
+
+ $url = $this->encodeAttribute($url);
+
+ $result = "encodeAttribute($title);
+ $result .= " title=\"$title\"";
+ }
+
+ $link_text = $this->runSpanGamut($link_text);
+ $result .= ">$link_text";
+
+ return $this->hash_part($result);
+ }
+
+ protected function doImages($text)
+ {
+ #
+ # Turn Markdown image shortcuts into tags.
+ #
+ #
+ # First, handle reference-style labeled images: ![alt text][id]
+ #
+ $text = preg_replace_callback('{
+ ( # wrap whole match in $1
+ !\[
+ ('.$this->nested_brackets_re.') # alt text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+
+ )
+ }xs',
+ array(&$this, '_doImages_reference_callback'), $text);
+
+ #
+ # Next, handle inline images: ![alt text](url "optional title")
+ # Don't forget: encode * and _
+ #
+ $text = preg_replace_callback('{
+ ( # wrap whole match in $1
+ !\[
+ ('.$this->nested_brackets_re.') # alt text = $2
+ \]
+ \s? # One optional whitespace character
+ \( # literal paren
+ [ \n]*
+ (?:
+ <(\S*)> # src url = $3
+ |
+ ('.$this->nested_url_parenthesis_re.') # src url = $4
+ )
+ [ \n]*
+ ( # $5
+ ([\'"]) # quote char = $6
+ (.*?) # title = $7
+ \6 # matching quote
+ [ \n]*
+ )? # title is optional
+ \)
+ )
+ }xs',
+ array(&$this, '_doImages_inline_callback'), $text);
+
+ return $text;
+ }
+
+ protected function _doImages_reference_callback($matches)
+ {
+ $whole_match = $matches[1];
+ $alt_text = $matches[2];
+ $link_id = strtolower($matches[3]);
+
+ if ($link_id == "") {
+ $link_id = strtolower($alt_text); # for shortcut links like ![this][].
+ }
+
+ $alt_text = $this->encodeAttribute($alt_text);
+ if (isset($this->urls[$link_id])) {
+ $url = $this->encodeAttribute($this->urls[$link_id]);
+ $result = "titles[$link_id])) {
+ $title = $this->titles[$link_id];
+ $title = $this->encodeAttribute($title);
+ $result .= " title=\"$title\"";
+ }
+ $result .= $this->suffix;
+ $result = $this->hash_part($result);
+ }
+ else {
+ # If there's no such link ID, leave intact:
+ $result = $whole_match;
+ }
+
+ return $result;
+ }
+
+ protected function _doImages_inline_callback($matches)
+ {
+ $whole_match = $matches[1];
+ $alt_text = $matches[2];
+ $url = $matches[3] == '' ? $matches[4] : $matches[3];
+ $title =& $matches[7];
+
+ $alt_text = $this->encodeAttribute($alt_text);
+ $url = $this->encodeAttribute($url);
+ $result = "encodeAttribute($title);
+ $result .= " title=\"$title\""; # $title already quoted
+ }
+ $result .= $this->suffix;
+
+ return $this->hash_part($result);
+ }
+
+ protected function doHeaders($text)
+ {
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ #
+ $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
+ array(&$this, '_doHeaders_callback_setext'), $text);
+
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ #
+ $text = preg_replace_callback('{
+ ^(\#{1,6}) # $1 = string of #\'s
+ [ ]*
+ (.+?) # $2 = Header text
+ [ ]*
+ \#* # optional closing #\'s (not counted)
+ \n+
+ }xm',
+ array(&$this, '_doHeaders_callback_atx'), $text);
+
+ return $text;
+ }
+
+ protected function _doHeaders_callback_setext($matches)
+ {
+ # Terrible hack to check we haven't found an empty list item.
+ if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
+ return $matches[0];
+
+ $level = $matches[2]{0} == '=' ? 1 : 2;
+ $block = "".$this->runSpanGamut($matches[1])."";
+ return "\n" . $this->hash_block($block) . "\n\n";
+ }
+
+ protected function _doHeaders_callback_atx($matches)
+ {
+ $level = strlen($matches[1]);
+ $block = "".$this->runSpanGamut($matches[2])."";
+ return "\n" . $this->hash_block($block) . "\n\n";
+ }
+
+ protected function doLists($text)
+ {
+ #
+ # Form HTML ordered (numbered) and unordered (bulleted) lists.
+ #
+ $less_than_tab = $this->tab_width - 1;
+
+ # Re-usable patterns to match list item bullets and number markers:
+ $marker_ul_re = '[*+-]';
+ $marker_ol_re = '\d+[.]';
+ $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
+
+ $markers_relist = array(
+ $marker_ul_re => $marker_ol_re,
+ $marker_ol_re => $marker_ul_re,
+ );
+
+ foreach ($markers_relist as $marker_re => $other_marker_re) {
+ # Re-usable pattern to match any entirel ul or ol list:
+ $whole_list_re = '
+ ( # $1 = whole list
+ ( # $2
+ ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
+ ('.$marker_re.') # $4 = first list item marker
+ [ ]+
+ )
+ (?s:.+?)
+ ( # $5
+ \z
+ |
+ \n{2,}
+ (?=\S)
+ (?! # Negative lookahead for another list item marker
+ [ ]*
+ '.$marker_re.'[ ]+
+ )
+ |
+ (?= # Lookahead for another kind of list
+ \n
+ \3 # Must have the same indentation
+ '.$other_marker_re.'[ ]+
+ )
+ )
+ )
+ '; // mx
+
+ # We use a different prefix before nested lists than top-level lists.
+ # See extended comment in _ProcessListItems().
+
+ if ($this->list_level) {
+ $text = preg_replace_callback('{
+ ^
+ '.$whole_list_re.'
+ }mx',
+ array(&$this, '_doLists_callback'), $text);
+ }
+ else {
+ $text = preg_replace_callback('{
+ (?:(?<=\n)\n|\A\n?) # Must eat the newline
+ '.$whole_list_re.'
+ }mx',
+ array(&$this, '_doLists_callback'), $text);
+ }
+ }
+
+ return $text;
+ }
+
+ protected function _doLists_callback($matches)
+ {
+ # Re-usable patterns to match list item bullets and number markers:
+ $marker_ul_re = '[*+-]';
+ $marker_ol_re = '\d+[.]';
+ $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
+
+ $list = $matches[1];
+ $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
+
+ $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
+
+ $list .= "\n";
+ $result = $this->processListItems($list, $marker_any_re);
+
+ $result = $this->hash_block("<$list_type>\n" . $result . "$list_type>");
+ return "\n". $result ."\n\n";
+ }
+
+ var $list_level = 0;
+
+ protected function processListItems($list_str, $marker_any_re)
+ {
+ #
+ # Process the contents of a single ordered or unordered list, splitting it
+ # into individual list items.
+ #
+ # The $this->list_level global keeps track of when we're inside a list.
+ # Each time we enter a list, we increment it; when we leave a list,
+ # we decrement. If it's zero, we're not in a list anymore.
+ #
+ # We do this because when we're not inside a list, we want to treat
+ # something like this:
+ #
+ # I recommend upgrading to version
+ # 8. Oops, now this line is treated
+ # as a sub-list.
+ #
+ # As a single paragraph, despite the fact that the second line starts
+ # with a digit-period-space sequence.
+ #
+ # Whereas when we're inside a list (or sub-list), that line will be
+ # treated as the start of a sub-list. What a kludge, huh? This is
+ # an aspect of Markdown's syntax that's hard to parse perfectly
+ # without resorting to mind-reading. Perhaps the solution is to
+ # change the syntax rules such that sub-lists must start with a
+ # starting cardinal number; e.g. "1." or "a.".
+
+ $this->list_level++;
+
+ # trim trailing blank lines:
+ $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
+
+ $list_str = preg_replace_callback('{
+ (\n)? # leading line = $1
+ (^[ ]*) # leading whitespace = $2
+ ('.$marker_any_re.' # list marker and space = $3
+ (?:[ ]+|(?=\n)) # space only required if item is not empty
+ )
+ ((?s:.*?)) # list item text = $4
+ (?:(\n+(?=\n))|\n) # tailing blank line = $5
+ (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
+ }xm',
+ array(&$this, '_processListItems_callback'), $list_str);
+
+ $this->list_level--;
+ return $list_str;
+ }
+
+ protected function _processListItems_callback($matches)
+ {
+ $item = $matches[4];
+ $leading_line =& $matches[1];
+ $leading_space =& $matches[2];
+ $marker_space = $matches[3];
+ $tailing_blank_line =& $matches[5];
+
+ if ($leading_line || $tailing_blank_line ||
+ preg_match('/\n{2,}/', $item))
+ {
+ # Replace marker with the appropriate whitespace indentation
+ $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
+ $item = $this->run_block_gamut($this->outdent($item)."\n");
+ }
+ else {
+ # Recursion for sub-lists:
+ $item = $this->doLists($this->outdent($item));
+ $item = preg_replace('/\n+$/', '', $item);
+ $item = $this->runSpanGamut($item);
+ }
+
+ return "" . $item . "\n";
+ }
+
+ protected function doCodeBlocks($text)
+ {
+ #
+ # Process Markdown `` blocks.
+ #
+ $text = preg_replace_callback('{
+ (?:\n\n|\A\n?)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?>
+ [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ }xm',
+ array(&$this, '_doCodeBlocks_callback'), $text);
+
+ return $text;
+ }
+
+ protected function _doCodeBlocks_callback($matches)
+ {
+ $codeblock = $matches[1];
+
+ $codeblock = $this->outdent($codeblock);
+ $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
+
+ # trim leading newlines and trailing newlines
+ $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
+
+ $codeblock = "$codeblock\n
";
+ return "\n\n".$this->hash_block($codeblock)."\n\n";
+ }
+
+ protected function makeCodeSpan($code)
+ {
+ #
+ # Create a code span markup for $code. Called from handleSpanToken.
+ #
+ $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
+ return $this->hash_part("$code
");
+ }
+
+ protected function prepare_italics_and_bold()
+ {
+ #
+ # Prepare regular expressions for searching emphasis tokens in any
+ # context.
+ #
+ foreach ($this->em_relist as $em => $em_re) {
+ foreach ($this->strong_relist as $strong => $strong_re) {
+ # Construct list of allowed token expressions.
+ $token_relist = array();
+ if (isset($this->em_strong_relist["$em$strong"])) {
+ $token_relist[] = $this->em_strong_relist["$em$strong"];
+ }
+ $token_relist[] = $em_re;
+ $token_relist[] = $strong_re;
+
+ # Construct master expression from list.
+ $token_re = '{('. implode('|', $token_relist) .')}';
+ $this->em_strong_prepared_relist["$em$strong"] = $token_re;
+ }
+ }
+ }
+
+ protected function doItalicsAndBold($text)
+ {
+ $token_stack = array('');
+ $text_stack = array('');
+ $em = '';
+ $strong = '';
+ $tree_char_em = false;
+
+ while (1) {
+ #
+ # Get prepared regular expression for seraching emphasis tokens
+ # in current context.
+ #
+ $token_re = $this->em_strong_prepared_relist["$em$strong"];
+
+ #
+ # Each loop iteration search for the next emphasis token.
+ # Each token is then passed to handleSpanToken.
+ #
+ $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
+ $text_stack[0] .= $parts[0];
+ $token =& $parts[1];
+ $text =& $parts[2];
+
+ if (empty($token)) {
+ # Reached end of text span: empty stack without emitting.
+ # any more emphasis.
+ while ($token_stack[0]) {
+ $text_stack[1] .= array_shift($token_stack);
+ $text_stack[0] .= array_shift($text_stack);
+ }
+ break;
+ }
+
+ $token_len = strlen($token);
+ if ($tree_char_em) {
+ # Reached closing marker while inside a three-char emphasis.
+ if ($token_len == 3) {
+ # Three-char closing marker, close em and strong.
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "$span";
+ $text_stack[0] .= $this->hash_part($span);
+ $em = '';
+ $strong = '';
+ } else {
+ # Other closing marker: close one em or strong and
+ # change current token state to match the other
+ $token_stack[0] = str_repeat($token{0}, 3-$token_len);
+ $tag = $token_len == 2 ? "strong" : "em";
+ $span = $text_stack[0];
+ $span = $this->runSpanGamut($span);
+ $span = "<$tag>$span$tag>";
+ $text_stack[0] = $this->hash_part($span);
+ $$tag = ''; # $$tag stands for $em or $strong
+ }
+ $tree_char_em = false;
+ } else if ($token_len == 3) {
+ if ($em) {
+ # Reached closing marker for both em and strong.
+ # Closing strong marker:
+ for ($i = 0; $i < 2; ++$i) {
+ $shifted_token = array_shift($token_stack);
+ $tag = strlen($shifted_token) == 2 ? "strong" : "em";
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "<$tag>$span$tag>";
+ $text_stack[0] .= $this->hash_part($span);
+ $$tag = ''; # $$tag stands for $em or $strong
+ }
+ } else {
+ # Reached opening three-char emphasis marker. Push on token
+ # stack; will be handled by the special condition above.
+ $em = $token{0};
+ $strong = "$em$em";
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $tree_char_em = true;
+ }
+ } else if ($token_len == 2) {
+ if ($strong) {
+ # Unwind any dangling emphasis marker:
+ if (strlen($token_stack[0]) == 1) {
+ $text_stack[1] .= array_shift($token_stack);
+ $text_stack[0] .= array_shift($text_stack);
+ }
+ # Closing strong marker:
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "$span";
+ $text_stack[0] .= $this->hash_part($span);
+ $strong = '';
+ } else {
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $strong = $token;
+ }
+ } else {
+ # Here $token_len == 1
+ if ($em) {
+ if (strlen($token_stack[0]) == 1) {
+ # Closing emphasis marker:
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "$span";
+ $text_stack[0] .= $this->hash_part($span);
+ $em = '';
+ } else {
+ $text_stack[0] .= $token;
+ }
+ } else {
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $em = $token;
+ }
+ }
+ }
+ return $text_stack[0];
+ }
+
+ protected function doBlockQuotes($text)
+ {
+ $text = preg_replace_callback('/
+ ( # Wrap whole match in $1
+ (?>
+ ^[ ]*>[ ]? # ">" at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )
+ /xm',
+ array(&$this, '_doBlockQuotes_callback'), $text);
+
+ return $text;
+ }
+
+ protected function _doBlockQuotes_callback($matches)
+ {
+ $bq = $matches[1];
+ # trim one level of quoting - trim whitespace-only lines
+ $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
+ $bq = $this->run_block_gamut($bq); # recurse
+
+ $bq = preg_replace('/^/m', " ", $bq);
+ # These leading spaces cause problem with content,
+ # so we need to fix that:
+ $bq = preg_replace_callback('{(\s*.+?
)}sx',
+ array(&$this, '_doBlockQuotes_callback2'), $bq);
+
+ return "\n". $this->hash_block("\n$bq\n
")."\n\n";
+ }
+
+ protected function _doBlockQuotes_callback2($matches)
+ {
+ $pre = $matches[1];
+ $pre = preg_replace('/^ /m', '', $pre);
+ return $pre;
+ }
+
+ protected function formParagraphs($text)
+ {
+ #
+ # Params:
+ # $text - string to process with html tags
+ #
+ # Strip leading and trailing lines:
+ $text = preg_replace('/\A\n+|\n+\z/', '', $text);
+
+ $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
+
+ #
+ # Wrap
tags and unhashify HTML blocks
+ #
+ foreach ($grafs as $key => $value) {
+ if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
+ # Is a paragraph.
+ $value = $this->runSpanGamut($value);
+ $value = preg_replace('/^([ ]*)/', "
", $value);
+ $value .= "
";
+ $grafs[$key] = $this->unhash($value);
+ }
+ else {
+ # Is a block.
+ # Modify elements of @grafs in-place...
+ $graf = $value;
+ $block = $this->html_hashes[$graf];
+ $graf = $block;
+ $grafs[$key] = $graf;
+ }
+ }
+
+ return implode("\n\n", $grafs);
+ }
+
+ protected function encodeAttribute($text)
+ {
+ #
+ # Encode text for a double-quoted HTML attribute. This function
+ # is *not* suitable for attributes enclosed in single quotes.
+ #
+ $text = $this->encodeAmpsAndAngles($text);
+ $text = str_replace('"', '"', $text);
+ return $text;
+ }
+
+ protected function encodeAmpsAndAngles($text)
+ {
+ #
+ # Smart processing for ampersands and angle brackets that need to
+ # be encoded. Valid character entities are left alone unless the
+ # no-entities mode is set.
+ #
+ if ($this->no_entities) {
+ $text = str_replace('&', '&', $text);
+ } else {
+ # Ampersand-encoding based entirely on Nat Irons's Amputator
+ # MT plugin:
+ $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
+ '&', $text);;
+ }
+ # Encode remaining <'s
+ $text = str_replace('<', '<', $text);
+
+ return $text;
+ }
+
+ protected function doAutoLinks($text)
+ {
+ $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
+ array(&$this, '_doAutoLinks_url_callback'), $text);
+
+ # Email addresses:
+ $text = preg_replace_callback('{
+ <
+ (?:mailto:)?
+ (
+ (?:
+ [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
+ |
+ ".*?"
+ )
+ \@
+ (?:
+ [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
+ |
+ \[[\d.a-fA-F:]+\] # IPv4 & IPv6
+ )
+ )
+ >
+ }xi',
+ array(&$this, '_doAutoLinks_email_callback'), $text);
+
+ return $text;
+ }
+
+ protected function _doAutoLinks_url_callback($matches)
+ {
+ $url = $this->encodeAttribute($matches[1]);
+ $link = "$url";
+ return $this->hash_part($link);
+ }
+
+ protected function _doAutoLinks_email_callback($matches)
+ {
+ $address = $matches[1];
+ $link = $this->encodeEmailAddress($address);
+ return $this->hash_part($link);
+ }
+
+ protected function encodeEmailAddress($addr)
+ {
+ #
+ # Input: an email address, e.g. "foo@example.com"
+ #
+ # Output: the email address as a mailto link, with each character
+ # of the address encoded as either a decimal or hex entity, in
+ # the hopes of foiling most address harvesting spam bots. E.g.:
+ #
+ # foo@exampl
+ # e.com
+ #
+ # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
+ # With some optimizations by Milian Wolff.
+ #
+ $addr = "mailto:" . $addr;
+ $chars = preg_split('/(? $char) {
+ $ord = ord($char);
+ # Ignore non-ascii chars.
+ if ($ord < 128) {
+ $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
+ # roughly 10% raw, 45% hex, 45% dec
+ # '@' *must* be encoded. I insist.
+ if ($r > 90 && $char != '@') /* do nothing */;
+ else if ($r < 45) $chars[$key] = ''.dechex($ord).';';
+ else $chars[$key] = ''.$ord.';';
+ }
+ }
+
+ $addr = implode('', $chars);
+ $text = implode('', array_slice($chars, 7)); # text without `mailto:`
+ $addr = "$text";
+
+ return $addr;
+ }
+
+ protected function parseSpan($str)
+ {
+ #
+ # Take the string $str and parse it into tokens, hashing embeded HTML,
+ # escaped characters and handling code spans.
+ #
+ $output = '';
+
+ $span_re = '{
+ (
+ \\\\'.$this->escape_chars_re.'
+ |
+ (?no_markup ? '' : '
+ |
+ # comment
+ |
+ <\?.*?\?> | <%.*?%> # processing instruction
+ |
+ <[/!$]?[-a-zA-Z0-9:_]+ # regular tags
+ (?>
+ \s
+ (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
+ )?
+ >
+ ').'
+ )
+ }xs';
+
+ while (1) {
+ #
+ # Each loop iteration seach for either the next tag, the next
+ # openning code span marker, or the next escaped character.
+ # Each token is then passed to handleSpanToken.
+ #
+ $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
+
+ # Create token from text preceding tag.
+ if ($parts[0] != "") {
+ $output .= $parts[0];
+ }
+
+ # Check if we reach the end.
+ if (isset($parts[1])) {
+ $output .= $this->handleSpanToken($parts[1], $parts[2]);
+ $str = $parts[2];
+ }
+ else {
+ break;
+ }
+ }
+
+ return $output;
+ }
+
+ protected function handleSpanToken($token, &$str)
+ {
+ #
+ # Handle $token provided by parseSpan by determining its nature and
+ # returning the corresponding value that should replace it.
+ #
+ switch ($token{0}) {
+ case "\\":
+ return $this->hash_part("". ord($token{1}). ";");
+ case "`":
+ # Search for end marker in remaining text.
+ if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
+ $str, $matches))
+ {
+ $str = $matches[2];
+ $codespan = $this->makeCodeSpan($matches[1]);
+ return $this->hash_part($codespan);
+ }
+ return $token; // return as text since no ending marker found.
+ default:
+ return $this->hash_part($token);
+ }
+ }
+
+ protected function outdent($text)
+ {
+ #
+ # Remove one level of line-leading tabs or spaces
+ #
+ return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
+ }
+
+ protected function detab($text)
+ {
+ #
+ # Replace tabs with the appropriate amount of space.
+ #
+ # For each line we separate the line in blocks delemited by
+ # tab characters. Then we reconstruct every line by adding the
+ # appropriate number of space between each blocks.
+
+ $text = preg_replace_callback('/^.*\t.*$/m',
+ array(&$this, '_detab_callback'), $text);
+
+ return $text;
+ }
+
+ protected function _detab_callback($matches)
+ {
+ $line = $matches[0];
+ $strlen = $this->utf8_strlen; # strlen function for UTF-8.
+
+ # Split in blocks.
+ $blocks = explode("\t", $line);
+ # Add each blocks to the line.
+ $line = $blocks[0];
+ unset($blocks[0]); # Do not add first block twice.
+ foreach ($blocks as $block) {
+ # Calculate amount of space, insert spaces, insert block.
+ $amount = $this->tab_width -
+ $strlen($line, 'UTF-8') % $this->tab_width;
+ $line .= str_repeat(" ", $amount) . $block;
+ }
+ return $line;
+ }
+
+ protected function init_detab()
+ {
+ #
+ # Check for the availability of the function in the `utf8_strlen` property
+ # (initially `mb_strlen`). If the function is not available, create a
+ # function that will loosely count the number of UTF-8 characters with a
+ # regular expression.
+ #
+ if (function_exists($this->utf8_strlen)) return;
+ $this->utf8_strlen = create_function('$text', 'return preg_match_all(
+ "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
+ $text, $m);');
+ }
+
+ protected function unhash($text)
+ {
+ #
+ # Swap back in all the tags hashed by _Hash_HTML_blocks.
+ #
+ return preg_replace_callback('/(.)\x1A[0-9]+\1/',
+ array(&$this, '_unhash_callback'), $text);
+ }
+
+ protected function _unhash_callback($matches)
+ {
+ return $this->html_hashes[$matches[0]];
+ }
+}
+?>
diff --git a/classes/markdown.php b/classes/markdown.php
new file mode 100644
index 0000000..b3139a7
--- /dev/null
+++ b/classes/markdown.php
@@ -0,0 +1,15 @@
+
+* @package Markdown
+* @copyright (c) 2010 Unmagnify team
+* @license http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
+* @version $id$
+* @link http://www.stroppytux.net/projects/kohana-markdown/
+* @since Available since Release 1.0
+* *
+*/
+abstract class Markdown extends Kohana_Markdown {}
+?>
diff --git a/config/markdown.php b/config/markdown.php
new file mode 100644
index 0000000..e800f21
--- /dev/null
+++ b/config/markdown.php
@@ -0,0 +1,11 @@
+ array
+ (
+ 'type' => 'xhtml', // html or xhtml
+ 'tab_width' => 4, // Tab width for output
+ // Many more options to come
+ )
+);
+?>