Skip to content

Commit

Permalink
Added HTML test for findUrls()
Browse files Browse the repository at this point in the history
  • Loading branch information
barnabywalters committed Jan 17, 2013
1 parent 4bba050 commit 759eb5f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
13 changes: 7 additions & 6 deletions BarnabyWalters/Helpers/Helpers.php
Expand Up @@ -461,20 +461,21 @@ public static function atomDate($date) {
* @return array An array containing all the URLs found in $text
*/
public static function findUrls($text, $tidy = true) {
// Pattern is from 1 cassis.js, slightly modified to not look for twitter names
// E.G. beforehand it would return @tantek for @tantek.com. This function is just interested in addresses, not twitter stuff
$pattern = '/(?:(?:(?:(?:http|https|irc)?:\\/\\/(?:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+(?:\\:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+)?\\@)?)?(?:(?:(?:[a-zA-Z0-9][-a-zA-Z0-9]*\\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|j[emop]|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|museum|m[acdeghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\\:\\d{1,5})?)(?:\\/(?:(?:[!#&-;=?-Z_a-z~])|(?:\\%[a-fA-F0-9]{2}))*)?)(?=\\b|\\s|$)/';
// Pattern is from 1 cassis.js, slightly modified to not look for
// twitter names E.G. beforehand it would return @tantek for @tantek.com.
// This function is just interested in addresses, not twitter stuff
$pattern = '/(?:(?:(?:(?:http|https|irc)?:\\/\\/(?:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+(?:\\:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+)?\\@)?)?(?:(?:(?:[a-zA-Z0-9][-a-zA-Z0-9]*\\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|j[emop]|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|museum|m[acdeghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\\:\\d{1,5})?)(?:\\/(?:(?:[!#&-;=?-Z_a-z~])|(?:\\%[a-fA-F0-9]{2}))*)?)(?=\\b|\\s|$)/i';

$c = preg_match_all($pattern, $text, $m);

if ($c) {
if ($c !== false) {
// Normalise
$links = array_values($m[0]);

ob_start();
$links = array_map(function($value) use ($tidy) {
return $tidy ? \web_address_to_uri($value, true) : $value;
}, $links);
return $tidy ? \web_address_to_uri($value, true) : $value;
}, $links);
ob_end_clean();

// $links = ['http://someurl.tld', •••]
Expand Down
13 changes: 12 additions & 1 deletion tests/BarnabyWalters/Helpers/HelpersTest.php
Expand Up @@ -279,11 +279,22 @@ public function testAtomDate() {
* @group text
* @group helpers
*/
public function testFindURLs() {
public function testFindUrlsFindsUrlsInPlaintext() {
$testString = 'Okay, so this string contains some URLs. http://waterpigs.co.uk, identi.ca, aaron.pk';
$testArray = array('http://waterpigs.co.uk', 'http://identi.ca', 'http://aaron.pk');

$this->assertEquals($testArray, H::findUrls($testString));
}

public function testFindUrlsFindsUrlsInAHref() {
$html = '<div class="p-name entry-title p-summary summary e-content entry-content">
<p>T5 <a rel="tag" href="/tags/pingback">#pingback</a> mentioning <a href="http://localhost.com/notes/711/">another note</a> <a rel="tag" href="/tags/test">#test</a> <a rel="tag" href="/tags/notweet">#notweet</a></p>
</div>';
$expected = ['http://localhost.com/notes/711'];
$result = H::findUrls($html);

$this->assertEquals($expected, $result);
}

}

Expand Down

0 comments on commit 759eb5f

Please sign in to comment.