Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #1413 from elinw/truncatecomplex2

Add a more complex truncate method
  • Loading branch information...
commit b3cc2f5b7d6b76d2bc562972bf6c8e3b5326c616 2 parents 49f4a29 + a65a97d
@pasamio pasamio authored
View
159 libraries/joomla/html/string.php
@@ -35,6 +35,11 @@
*/
public static function truncate($text, $length = 0, $noSplit = true, $allowHtml = true)
{
+ // Assume a lone open tag is invalid HTML.
+ if ($length == 1 && substr($text, 0, 1) == '<')
+ {
+ return '...';
+ }
// Check if HTML tags are allowed.
if (!$allowHtml)
{
@@ -52,25 +57,32 @@ public static function truncate($text, $length = 0, $noSplit = true, $allowHtml
$text = JString::trim(preg_replace('#\s+#mui', ' ', $text));
}
- // Truncate the item text if it is too long.
+ // Whether or not allowing HTML, truncate the item text if it is too long.
if ($length > 0 && JString::strlen($text) > $length)
{
- // Find the first space within the allowed length.
- $tmp = JString::substr($text, 0, $length);
+ $tmp = trim(JString::substr($text, 0, $length));
+ if (substr($tmp, 0, 1) == '<' && strpos($tmp, '>') === false)
+ {
+ return '...';
+ }
+ // $noSplit true means that we do not allow splitting of words.
if ($noSplit)
{
+ // Find the position of the last space within the allowed length.
$offset = JString::strrpos($tmp, ' ');
- if (JString::strrpos($tmp, '<') > JString::strrpos($tmp, '>'))
+ $tmp = JString::substr($tmp, 0, $offset + 1);
+
+ // If there are no spaces and the string is longer than the maximum
+ // we need to just use the ellipsis. In that case we are done.
+ if ($offset === false && strlen($text) > $length)
{
- $offset = JString::strrpos($tmp, '<');
+ return '...';
}
- $tmp = JString::substr($tmp, 0, $offset);
- // If we don't have 3 characters of room, go to the second space within the limit.
if (JString::strlen($tmp) > $length - 3)
{
- $tmp = JString::substr($tmp, 0, JString::strrpos($tmp, ' '));
+ $tmp = trim(JString::substr($tmp, 0, JString::strrpos($tmp, ' ')));
}
}
@@ -79,6 +91,8 @@ public static function truncate($text, $length = 0, $noSplit = true, $allowHtml
// Put all opened tags into an array
preg_match_all("#<([a-z][a-z0-9]*)\b.*?(?!/)>#i", $tmp, $result);
$openedTags = $result[1];
+
+ // Some tags self close so they do not need a separate close tag.
$openedTags = array_diff($openedTags, array("img", "hr", "br"));
$openedTags = array_values($openedTags);
@@ -88,12 +102,13 @@ public static function truncate($text, $length = 0, $noSplit = true, $allowHtml
$numOpened = count($openedTags);
- // All tags are closed
+ // All tags are closed so trim the text and finish.
if (count($closedTags) == $numOpened)
{
- return $tmp . '...';
+ return trim($tmp) . '...';
}
+ // Closing tags need to be in the reverse order of opening tags.
$openedTags = array_reverse($openedTags);
// Close tags
@@ -109,14 +124,134 @@ public static function truncate($text, $length = 0, $noSplit = true, $allowHtml
}
}
}
-
- $text = $tmp . '...';
+ if ( $tmp === false || strlen($text) > strlen($tmp))
+ {
+ $text = trim($tmp) . '...';
+ }
}
+ // Clean up any internal spaces created by the processing.
+ $text = str_replace(' </', '</', $text);
+ $text = str_replace(' ...', '...', $text);
+
return $text;
}
/**
+ * Method to extend the truncate method to more complex situations
+ *
+ * The goal is to get the proper length plain text string with as much of
+ * the html intact as possible with all tags properly closed.
+ *
+ * @param string $html The content of the introtext to be truncated
+ * @param integer $maxLength The maximum number of characters to render
+ * @param boolean $noSplit Don't split a word if that is where the cutoff occurs (default: true).
+ *
+ * @return string The truncated string. If the string is truncated an ellipsis
+ * (...) will be appended.
+ *
+ * @note: If a maximum length of 3 or less is selected and the text has more than
+ * that number of characters an ellipsis will be displayed.
+ * This method will not create valid HTML from malformed HTML.
+ *
+ * @since 12.2
+ */
+ public static function truncateComplex($html, $maxLength = 0, $noSplit = true)
+ {
+ // Start with some basic rules.
+ $baseLength = strlen($html);
+
+ // If the original HTML string is shorter than the $maxLength do nothing and return that.
+ if ($baseLength <= $maxLength || $maxLength == 0)
+ {
+ return $html;
+ }
+
+ // Take care of short simple cases.
+ if ($maxLength <= 3 && substr($html, 0, 1) != '<' && strpos(substr($html, 0, $maxLength - 1), '<') === false && $baseLength > $maxLength)
+ {
+ return '...';
+ }
+
+ // Deal with maximum length of 1 where the string starts with a tag.
+ if ($maxLength == 1 && substr($html, 0, 1) == '<')
+ {
+ $endTagPos = strlen(strstr($html, '>', true));
+ $tag = substr($html, 1, $endTagPos);
+
+ $l = $endTagPos + 1;
+ if ($noSplit)
+ {
+ return substr($html, 0, $l) . '</' . $tag . '...';
+ }
+ $character = substr(strip_tags($html), 0, 1);
+
+ return substr($html, 0, $l) . '</' . $tag . '...';
+ }
+
+ // First get the truncated plain text string. This is the rendered text we want to end up with.
+ $ptString = JHtml::_('string.truncate', $html, $maxLength, $noSplit, $allowHtml = false);
+
+ // It's all HTML, just return it.
+ if (strlen($ptString) == 0)
+ {
+ return $html;
+ }
+
+ // If the plain text is shorter than the max length the variable will not end in ...
+ // In that case we use the whole string.
+ if (substr($ptString, -3) != '...')
+ {
+ return $html;
+ }
+
+ // Regular truncate gives us the ellipsis but we want to go back for text and tags.
+ if ($ptString == '...')
+ {
+ $stripped = substr(strip_tags($html), 0, $maxLength);
+ $ptString = JHtml::_('string.truncate', $stripped, $maxLength, $noSplit, $allowHtml = false);
+ }
+ // We need to trim the ellipsis that truncate adds.
+ $ptString = rtrim($ptString, '.');
+
+ // Now deal with more complex truncation.
+ $diffLength = 0;
+ while ($maxLength <= $baseLength)
+ {
+ // Get the truncated string assuming HTML is allowed.
+ $htmlString = JHtml::_('string.truncate', $html, $maxLength, $noSplit, $allowHtml = true);
+
+ if ($htmlString == '...' && strlen($ptString) + 3 > $maxLength)
+ {
+ return $htmlString;
+ }
+
+ $htmlString = rtrim($htmlString, '.');
+
+ // Now get the plain text from the HTML string and trim it.
+ $htmlStringToPtString = JHtml::_('string.truncate', $htmlString, $maxLength, $noSplit, $allowHtml = false);
+ $htmlStringToPtString = rtrim($htmlStringToPtString, '.');
+
+ // If the new plain text string matches the original plain text string we are done.
+ if ($ptString == $htmlStringToPtString)
+ {
+ return $htmlString . '...';
+ }
+
+ // Get the number of HTML tag characters in the first $maxLength characters
+ $diffLength = strlen($ptString) - strlen($htmlStringToPtString);
+
+ if ($diffLength <= 0)
+ {
+ return $htmlString . '...';
+ }
+
+ // Set new $maxlength that adjusts for the HTML tags
+ $maxLength += $diffLength;
+ }
+ }
+
+ /**
* Abridges text strings over the specified character limit. The
* behavior will insert an ellipsis into the text replacing a section
* of variable size to ensure the string does not exceed the defined
View
250 tests/suites/unit/joomla/html/html/JHtmlStringTest.php
@@ -75,14 +75,14 @@ function getTestTruncateData()
),
'Plain text over the limit by two words' => array(
'Plain text test',
- 12,
+ 7,
true,
true,
'Plain...',
),
'Plain text over the limit by one word' => array(
'Plain text test',
- 13,
+ 14,
true,
true,
'Plain text...',
@@ -97,7 +97,7 @@ function getTestTruncateData()
'Plain text over the limit splitting first word' => array(
'Plain text',
3,
- true,
+ false,
true,
'Pla...',
),
@@ -127,28 +127,38 @@ function getTestTruncateData()
22,
true,
true,
- '<span>Plain text</span>...',
+ '<span>Plain</span>...',
),
+ // The tags by themselves make the string too long.
'Plain html over the limit by one word' => array(
'<span>Plain text</span>',
12,
true,
true,
- '<span>Plain</span>...',
+ '...',
),
+ // Don't return invalid HTML
'Plain html over the limit splitting first word' => array(
'<span>Plain text</span>',
- 10,
+ 1,
+ false,
true,
+ '...',
+ ),
+ // Don't return invalid HTML
+ 'Plain html over the limit splitting first word' => array(
+ '<span>Plain text</span>',
+ 4,
+ false,
true,
- '<span>Plai</span>...',
+ '...',
),
'Complex html over the limit' => array(
'<div><span><i>Plain</i> <b>text</b> foo</span></div>',
37,
true,
true,
- '<div><span><i>Plain</i></span></div>...',
+ '<div><span><i>Plain</i> <b>text</b></span></div>...',
),
'Complex html over the limit 2' => array(
'<div><span><i>Plain</i> <b>text</b> foo</span></div>',
@@ -166,10 +176,209 @@ function getTestTruncateData()
),
'HTML not allowed, no split' => array(
'<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 4,
+ true,
+ false,
+ '...',
+ ),
+ 'First character is < with a maximum length of 1' => array(
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 1,
+ true,
+ false,
+ '...',
+ ),
+ 'HTML not allowed, no split' => array(
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 5,
+ true,
+ false,
+ '...',
+ ),
+ 'Text is the same as maxLength, no split, HTML allowed' => array(
+ '<div><span><i>Plain</i></span></div>',
+ 5,
+ true,
+ true,
+ '...',
+ ),
+ 'HTML not allowed, no split' => array(
+ '<div><span><i>Plain</i></span></div>',
+ 5,
+ true,
+ false,
+ 'Plain',
+ ),
+ );
+ }
+ /**
+ * Test cases for complex truncate.
+ *
+ * @return array
+ *
+ * @since 12.2
+ */
+ function getTestTruncateComplexData()
+ {
+ return array(
+
+ 'No change case' => array(
+ 'Plain text',
+ 10,
+ true,
+ 'Plain text'
+ ),
+ 'Plain text under the limit' => array(
+ 'Plain text',
+ 100,
+ true,
+ 'Plain text'
+ ),
+ 'Plain text at the limit' => array(
+ 'Plain text',
+ 10,
+ true,
+ 'Plain text'
+ ),
+ 'Plain text over the limit by two words' => array(
+ 'Plain text test',
+ 6,
+ true,
+ '...'
+ ),
+ 'Plain text over the limit by one word' => array(
+ 'Plain text test',
+ 13,
+ true,
+ 'Plain text...'
+ ),
+ 'Plain text over the limit with short trailing words' => array(
+ 'Plain text a b c d',
+ 13,
+ true,
+ 'Plain text...'
+ ),
+ 'Plain text over the limit splitting first word' => array(
+ 'Plain text',
+ 3,
+ false,
+ '...'
+ ),
+ 'Plain text with word split' => array(
+ 'Plain split-less',
+ 7,
+ true,
+ 'Plain...'
+ ),
+ 'Plain text under a short limit' => array(
+ 'Hi',
+ 3,
+ true,
+ 'Hi'
+ ),
+ 'Plain text with length 1 and a limit of 1' => array(
+ 'H',
+ 1,
+ true,
+ 'H'
+ ),
+ 'Plain html under the limit' => array(
+ '<span>Plain text</span>',
+ 100,
+ true,
+ '<span>Plain text</span>'
+ ),
+ 'Plain html at the limit' => array(
+ '<span>Plain text</span>',
+ 23,
+ true,
+ '<span>Plain text</span>'
+ ),
+ 'Plain html over the limit but under the text limit' => array(
+ '<span>Plain text</span>',
+ 22,
+ true,
+ '<span>Plain text</span>'
+ ),
+
+ 'Plain html over the limit by one word' => array(
+ '<span>Plain text</span>',
8,
true,
+ '<span>Plain</span>...'
+ ),
+ 'Plain html over the limit splitting first word' => array(
+ '<span>Plain text</span>',
+ 4,
false,
- 'Plain...',
+ '<span>P</span>...'
+ ),
+ 'Plain html over the limit splitting first word' => array(
+ '<span>Plain text</span>',
+ 1,
+ false,
+ '<span></span>...'
+ ),
+ 'Complex html over the limit but under the text limit' => array(
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 37,
+ true,
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>'
+ ),
+ 'Complex html over the limit 2' => array(
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 38,
+ true,
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>'
+ ),
+ 'Split words' => array(
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 8,
+ false,
+ '<div><span><i>Plain</i> <b>te</b></span></div>...'
+ ),
+ 'No split' => array(
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 8,
+ true,
+ '<div><span><i>Plain</i></span></div>...'
+ ),
+ 'First character is < with a maximum length of 1, no split' => array(
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 1,
+ true,
+ '<div></div>...'
+ ),
+ 'First character is < with a maximum length of 1, split' => array(
+ '<div><span><i>Plain</i> <b>text</b> foo</span></div>',
+ 1,
+ false,
+ '<div></div>...'
+ ),
+ 'Text is the same as maxLength, Complex HTML, no split' => array(
+ '<div><span><i>Plain</i></span></div>',
+ 5,
+ true,
+ '<div><span><i>Plain</i></span></div>'
+ ),
+ 'Text is all HTML' => array(
+ '<img src="myimage.jpg" />',
+ 5,
+ true,
+ '<img src="myimage.jpg" />'
+ ),
+ 'Text with no spaces, split, maxlength 3' => array(
+ 'thisistextwithnospace',
+ 3,
+ false,
+ '...'
+ ),
+ // From issue tracker, was creating infinite loop
+ 'Complex test from issue tracker' => array(
+ '<p class="mod-articles-category-introtext"><em>Bestas Review Magazine</em> featured <a href="http://viewer.zmags.com/publication/a1b0fbb9#/a1b0fbb9/28">something</a> else</p>',
+ 60,
+ false,
+ '<p class="mod-articles-category-introtext"><em>Bestas Review Magazine</em> featured <a href="http://viewer.zmags.com/publication/a1b0fbb9#/a1b0fbb9/28">something</a> else</p>'
),
);
}
@@ -216,4 +425,27 @@ public function testTruncate($text, $length, $noSplit, $allowedHtml, $expected)
$this->equalTo($expected)
);
}
+
+ /**
+ * Tests the JHtmlString::truncateComplex method.
+ *
+ * @param string $html The text to truncate.
+ * @param integer $maxLength The maximum length of the text.
+ * @param boolean $noSplit Don't split a word if that is where the cutoff occurs (default: true)
+ * @param boolean $allowHtml Allow HTML, always true for truncateComplex. Needed for
+ * compatibility with truncate tests.
+ * @param string $expected The expected result.
+ *
+ * @return void
+ *
+ * @dataProvider getTestTruncateComplexData
+ * @since 12.2
+ */
+ public function testTruncateComplex($html, $maxLength, $noSplit, $expected)
+ {
+ $this->assertThat(
+ JHtmlString::truncateComplex($html, $maxLength, $noSplit),
+ $this->equalTo($expected)
+ );
+ }
}
Please sign in to comment.
Something went wrong with that request. Please try again.