Skip to content

Commit

Permalink
Moved truncenator to it’s own file, same for it’s tests. Set up a tes…
Browse files Browse the repository at this point in the history
…t bootstrap to autoload stuff. Made template classes for Posse Helpers and their tests
  • Loading branch information
barnabywalters committed Nov 13, 2012
1 parent 5d42414 commit ddae829
Show file tree
Hide file tree
Showing 10 changed files with 486 additions and 434 deletions.
84 changes: 0 additions & 84 deletions BarnabyWalters/Helpers/Helpers.php
Expand Up @@ -51,90 +51,6 @@ public static function authorFromUri($uri) {
}
}

/**
* ~ THE TRUNCENATOR ~
*
* Takes a string (tweet-like note) and some config params, produces a truncated version to spec.
*
* @param string $string The string to be truncated
* @param int $length The maximum length of the output
* @param string $ellipsis The string to append in the case of truncation
* @param string $uri The canonical URI of the post, to be added to the end
* @param int $urilen Treat any URLs as if they were this length
* @param bool $parens If trucation is not required, surround the canon. link with parens (())
* @param int $hashtags The number of hashtags present in the text to preserve if trucation occurs
*
* @return string The truncated string
* @todo A lot of this functionality is not properly implemented
*/
public static function truncate($string, $length = 140, $uri = null, $urilen = null, $parens = true, $ellipsis = '…', $hastags = 1) {
mb_internal_encoding('UTF-8');

// Figure out total append length if truncation occurs
$append = $ellipsis;
if (!empty($uri))
$append .= ' ' . $uri;

// if $urilen is set, create array of URIs within the text and replace them with dummy text @ $urilen chars
if (is_int($urilen)) {
$uris = array();
foreach (Helpers::findUrls($string, $tidy = false) as $key => $url) {
$dummy = 'URL' . $key;
$dummy .= str_repeat('X', $urilen - mb_strlen($dummy));
$uris[$dummy] = $url;
$string = str_replace($url, $dummy, $string);
}
}

// Truncate string to nearest WB below that length
$matches = array();
$words = array();
preg_match_all('/\b\w+\b/', $string, $matches, PREG_OFFSET_CAPTURE);
foreach ($matches[0] as $match) {
// For each match
$words[] = array($match[1], $match[0]);
}
// $words = {[offset, 'string'], [offset, 'string'] •••}

$maxplainlen = $length - Helpers::uriMbStrlen($append, $urilen);

// See if truncation will happen
if (Helpers::uriMbStrlen($string, $urilen) > $maxplainlen) {
foreach ($words as $key => $word) {
// Is the current word the first to cross $maxplainlen?
if ($word[0] > $maxplainlen or $word[0] + mb_strlen($word[1]) > $maxplainlen) {
// Yes. The current word and all words after it must be removed
$plaintargetlen = $words[$key - 1][0] + mb_strlen($words[$key - 1][1]);
break;
}
}

if (!isset($plaintargetlen))
$plaintargetlen = $maxplainlen;

// Truncate string
$truncatedplain = mb_substr($string, 0, $plaintargetlen);

// Add the append
$trunc = $truncatedplain . $append;
}
else {
// If no trucation required, just append the URL
// TODO: if adding the space and brackets will push over the edge, remove enough words to compensate
// TODO: write edge-case test to cover that scenario
$trunc = $string . ' (' . $uri . ')';
}

// if $urilen set, expand dummies into full URIs
if (is_int($urilen)) {
foreach ($uris as $dummy => $uri) {
$trunc = str_replace($dummy, $uri, $trunc);
}
}

return $trunc;
}

/**
* Replace <img> elements with their @href
*
Expand Down
14 changes: 14 additions & 0 deletions BarnabyWalters/Posse/Helpers.php
@@ -0,0 +1,14 @@
<?php

namespace BarnabyWalters\Posse;

/**
* Description of Helpers
*
* @author barnabywalters
*/
class Helpers {

}

// EOF
99 changes: 99 additions & 0 deletions BarnabyWalters/Posse/Truncenator.php
@@ -0,0 +1,99 @@
<?php

namespace BarnabyWalters\Posse;

use BarnabyWalters\Helpers\Helpers as H;

/**
* THE TRUNCENATOR
*
* @author Barnaby Walters
*/
class Truncenator {

/**
* ~ THE TRUNCENATOR ~
*
* Takes a string (tweet-like note) and some config params, produces a truncated version to spec.
*
* @param string $string The string to be truncated
* @param int $length The maximum length of the output
* @param string $ellipsis The string to append in the case of truncation
* @param string $uri The canonical URI of the post, to be added to the end
* @param int $urilen Treat any URLs as if they were this length
* @param bool $parens If trucation is not required, surround the canon. link with parens (())
* @param int $hashtags The number of hashtags present in the text to preserve if trucation occurs
*
* @return string The truncated string
* @todo A lot of this functionality is not properly implemented
*/
public static function truncate($string, $length = 140, $uri = null, $urilen = null, $parens = true, $ellipsis = '…', $hastags = 1) {
mb_internal_encoding('UTF-8');

// Figure out total append length if truncation occurs
$append = $ellipsis;
if (!empty($uri))
$append .= ' ' . $uri;

// if $urilen is set, create array of URIs within the text and replace them with dummy text @ $urilen chars
if (is_int($urilen)) {
$uris = array();
foreach (H::findUrls($string, $tidy = false) as $key => $url) {
$dummy = 'URL' . $key;
$dummy .= str_repeat('X', $urilen - mb_strlen($dummy));
$uris[$dummy] = $url;
$string = str_replace($url, $dummy, $string);
}
}

// Truncate string to nearest WB below that length
$matches = array();
$words = array();
preg_match_all('/\b\w+\b/', $string, $matches, PREG_OFFSET_CAPTURE);
foreach ($matches[0] as $match) {
// For each match
$words[] = array($match[1], $match[0]);
}
// $words = {[offset, 'string'], [offset, 'string'] •••}

$maxplainlen = $length - H::uriMbStrlen($append, $urilen);

// See if truncation will happen
if (H::uriMbStrlen($string, $urilen) > $maxplainlen) {
foreach ($words as $key => $word) {
// Is the current word the first to cross $maxplainlen?
if ($word[0] > $maxplainlen or $word[0] + mb_strlen($word[1]) > $maxplainlen) {
// Yes. The current word and all words after it must be removed
$plaintargetlen = $words[$key - 1][0] + mb_strlen($words[$key - 1][1]);
break;
}
}

if (!isset($plaintargetlen))
$plaintargetlen = $maxplainlen;

// Truncate string
$truncatedplain = mb_substr($string, 0, $plaintargetlen);

// Add the append
$trunc = $truncatedplain . $append;
}
else {
// If no trucation required, just append the URL
// TODO: if adding the space and brackets will push over the edge, remove enough words to compensate
// TODO: write edge-case test to cover that scenario
$trunc = $string . ' (' . $uri . ')';
}

// if $urilen set, expand dummies into full URIs
if (is_int($urilen)) {
foreach ($uris as $dummy => $uri) {
$trunc = str_replace($dummy, $uri, $trunc);
}
}

return $trunc;
}
}

// EOF
6 changes: 3 additions & 3 deletions nbproject/project.properties
@@ -1,8 +1,8 @@
include.path=${php.global.include.path}
php.version=PHP_54
phpunit.bootstrap=
phpunit.bootstrap.create.tests=false
phpunit.configuration=
phpunit.bootstrap=tests/bootstrap.php
phpunit.bootstrap.create.tests=true
phpunit.configuration=phpunit.xml
phpunit.run.test.files=true
phpunit.script=
phpunit.suite=
Expand Down
21 changes: 11 additions & 10 deletions phpunit.xml
@@ -1,4 +1,5 @@
<phpunit backupGlobals="true"
bootstrap="Tests/bootstrap.php"
backupStaticAttributes="false"
cacheTokens="false"
colors="false"
Expand All @@ -15,14 +16,14 @@
stopOnSkipped="false"
strict="false"
verbose="false">
<testsuites>
<testsuite name="Test Suite">
<directory suffix="Test.php">tests/</directory>
</testsuite>
</testsuites>
<logging>
<log type="coverage-html" target="tmp/report" charset="UTF-8"
highlight="true"/>
<log type="testdox-html" target="tmp/index.html"/>
</logging>
<testsuites>
<testsuite name="Test Suite">
<directory suffix="Test.php">Tests/</directory>
</testsuite>
</testsuites>
<logging>
<log type="coverage-html" target="tmp/report" charset="UTF-8"
highlight="true"/>
<log type="testdox-html" target="tmp/index.html"/>
</logging>
</phpunit>

0 comments on commit ddae829

Please sign in to comment.