Skip to content

Commit

Permalink
simplified parser
Browse files Browse the repository at this point in the history
  • Loading branch information
cebe committed May 24, 2014
1 parent 56552bb commit 0eac7ba
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 111 deletions.
20 changes: 9 additions & 11 deletions GithubMarkdown.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ protected function inlineMarkers()
'ftp' => 'parseUrl',
'~~' => 'parseStrike',
];

if ($this->enableNewlines) {
$markers["\n"] = 'parseDirectNewline';
}

return array_merge(parent::inlineMarkers(), $markers);
}

Expand Down Expand Up @@ -122,13 +117,16 @@ protected function parseUrl($markdown)
}

/**
* Parses a newline indicated by a direct line break. This is only used when `enableNewlines` is true.
* @inheritdocs
*
* Parses a newline indicated by two spaces on the end of a markdown line.
*/
protected function parseDirectNewline($markdown)
protected function parsePlainText($text)
{
return [
$this->html5 ? "<br>\n" : "<br />\n",
1
];
if ($this->enableNewlines) {
return preg_replace("/( \n|\n)/", $this->html5 ? "<br>\n" : "<br />\n", $text);
} else {
return parent::parsePlainText($text);
}
}
}
10 changes: 4 additions & 6 deletions Markdown.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ class Markdown extends Parser
protected function inlineMarkers()
{
return [
" \n" => 'parseNewline',
'&' => 'parseEntity',
'![' => 'parseImage',
'*' => 'parseEmphStrong',
Expand Down Expand Up @@ -559,14 +558,13 @@ protected function renderHr($block)


/**
* @inheritdocs
*
* Parses a newline indicated by two spaces on the end of a markdown line.
*/
protected function parseNewline($text)
protected function parsePlainText($text)
{
return [
$this->html5 ? "<br>\n" : "<br />\n",
3
];
return str_replace(" \n", $this->html5 ? "<br>\n" : "<br />\n", $text);
}

/**
Expand Down
109 changes: 15 additions & 94 deletions Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ class Parser

private $_inlineMarkers = [];

private $_whitespaceInlineMarkers = [];

/**
* Parses the given text considering the full language.
*
Expand Down Expand Up @@ -68,27 +66,16 @@ public function parseParagraph($text)
private function prepareMarkers($text)
{
$this->_inlineMarkers = [];
$this->_whitespaceInlineMarkers = [];
// add all markers that are present in markdown
// check is done to avoid iterations in parseInline(), good for huge markdown files
foreach ($this->inlineMarkers() as $marker => $method) {
if (strpos($text, $marker) !== false) {
$m = substr($marker, 0, 1);
// markers beginning with whitespace are handled differently
if ($m !== ' ') {
// put the longest marker first
if (isset($this->_inlineMarkers[$m]) && strlen($marker) > strlen(reset($this->_inlineMarkers[$m]))) {
$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
break;
}
$this->_inlineMarkers[$m][$marker] = $method;
$m = $marker[0];
// put the longest marker first
if (isset($this->_inlineMarkers[$m]) && strlen($marker) > strlen(reset($this->_inlineMarkers[$m]))) {
$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
} else {
// put the longest marker first
if (!empty($this->_whitespaceInlineMarkers) && strlen($marker) > strlen(reset($this->_whitespaceInlineMarkers))) {
$this->_whitespaceInlineMarkers = array_merge([$marker => $method], $this->_whitespaceInlineMarkers);
break;
}
$this->_whitespaceInlineMarkers[$marker] = $method;
$this->_inlineMarkers[$m][$marker] = $method;
}
}
}
Expand Down Expand Up @@ -209,6 +196,9 @@ protected function renderParagraph($block)
* When a marker is found in the text, the handler method is called with the text
* starting at the position of the marker.
*
* Note that markers starting with whitespace may slow down the parser,
* you may want to use [[parsePlainText]] to deal with them.
*
* @return array a map of markers to parser methods
*/
protected function inlineMarkers()
Expand All @@ -224,12 +214,6 @@ protected function inlineMarkers()
*/
protected function parseInline($text)
{
// markers beginning with a whitespace are handled differently
// because of too many false-positive matches of strpbrk
if (!empty($this->_whitespaceInlineMarkers) && $this->matchNearestWhitespaceMarker($text) !== false) {
return $this->parseInlineWithWhitespace($text);
}

$markers = implode('', array_keys($this->_inlineMarkers));

$paragraph = '';
Expand All @@ -240,7 +224,7 @@ protected function parseInline($text)

// add the text up to next marker to the paragraph
if ($pos !== 0) {
$paragraph .= substr($text, 0, $pos);
$paragraph .= $this->parsePlainText(substr($text, 0, $pos));
}
$text = $found;

Expand All @@ -261,81 +245,18 @@ protected function parseInline($text)
}
}

$paragraph .= $text;
$paragraph .= $this->parsePlainText($text);

return $paragraph;
}

/**
* Parses inline elements of the language.
*
* @param $text
* @return string
* This function gets called for each plain text section in the markdown text.
* It can be used to work on normal text section for example to highlight keywords or
* do special escaping.
*/
private function parseInlineWithWhitespace($text)
{
$markers = implode('', array_keys($this->_inlineMarkers));

$paragraph = '';

while (true) {
if (!empty($markers)) {
$found = strpbrk($text, $markers);
} else {
$found = false;
}
$wpos = $this->matchNearestWhitespaceMarker($text);

if ($found === false && $wpos === false) {
break;
}
// switch between found whitespace or marker
if ($found !== false) {
$pos = strpos($text, $found);
$matchedMarkers = $this->_inlineMarkers[$found[0]];
}
if ($wpos !== false && ($found === false || $wpos < $pos)) {
$pos = $wpos;
$found = substr($text, $wpos);
$matchedMarkers = $this->_whitespaceInlineMarkers;
}

// add the text up to next marker to the paragraph
if ($pos !== 0) {
$paragraph .= substr($text, 0, $pos);
}
$text = $found;

$parsed = false;
foreach ($matchedMarkers as $marker => $method) {
if (strncmp($text, $marker, strlen($marker)) === 0) {
// parse the marker
list($output, $offset) = $this->$method($text);
$paragraph .= $output;
$text = substr($text, $offset);
$parsed = true;
break;
}
}
if (!$parsed) {
$paragraph .= substr($text, 0, 1);
$text = substr($text, 1);
}
}

$paragraph .= $text;

return $paragraph;
}

private function matchNearestWhitespaceMarker($text)
protected function parsePlainText($text)
{
$pos = false;
foreach ($this->_whitespaceInlineMarkers as $marker => $method) {
if (($wpos = strpos($text, $marker)) !== false && ($pos === false || $pos > $wpos)) {
$pos = $wpos;
}
}
return $pos;
return $text;
}
}
2 changes: 2 additions & 0 deletions tests/GithubMarkdownTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@ public function testNewlines()
$this->assertEquals("This is text<br />\nnewline\nnewline.", $markdown->parseParagraph("This is text \nnewline\nnewline."));
$markdown->enableNewlines = true;
$this->assertEquals("This is text<br />\nnewline<br />\nnewline.", $markdown->parseParagraph("This is text \nnewline\nnewline."));

$this->assertEquals("<p>This is text</p>\n<p>newline<br />\nnewline.</p>\n", $markdown->parse("This is text\n\nnewline\nnewline."));
}
}

0 comments on commit 0eac7ba

Please sign in to comment.