Skip to content
Browse files

Added HTML test for findUrls()

  • Loading branch information...
1 parent 4bba050 commit 759eb5f76e079022dc3342181d500e72285ee978 @barnabywalters committed
Showing with 19 additions and 7 deletions.
  1. +7 −6 BarnabyWalters/Helpers/Helpers.php
  2. +12 −1 tests/BarnabyWalters/Helpers/HelpersTest.php
View
13 BarnabyWalters/Helpers/Helpers.php
@@ -461,20 +461,21 @@ public static function atomDate($date) {
* @return array An array containing all the URLs found in $text
*/
public static function findUrls($text, $tidy = true) {
- // Pattern is from 1 cassis.js, slightly modified to not look for twitter names
- // E.G. beforehand it would return @tantek for @tantek.com. This function is just interested in addresses, not twitter stuff
- $pattern = '/(?:(?:(?:(?:http|https|irc)?:\\/\\/(?:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+(?:\\:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+)?\\@)?)?(?:(?:(?:[a-zA-Z0-9][-a-zA-Z0-9]*\\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|j[emop]|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|museum|m[acdeghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\\:\\d{1,5})?)(?:\\/(?:(?:[!#&-;=?-Z_a-z~])|(?:\\%[a-fA-F0-9]{2}))*)?)(?=\\b|\\s|$)/';
+ // Pattern is from 1 cassis.js, slightly modified to not look for
+ // twitter names E.G. beforehand it would return @tantek for @tantek.com.
+ // This function is just interested in addresses, not twitter stuff
+ $pattern = '/(?:(?:(?:(?:http|https|irc)?:\\/\\/(?:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+(?:\\:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+)?\\@)?)?(?:(?:(?:[a-zA-Z0-9][-a-zA-Z0-9]*\\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|j[emop]|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|museum|m[acdeghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\\:\\d{1,5})?)(?:\\/(?:(?:[!#&-;=?-Z_a-z~])|(?:\\%[a-fA-F0-9]{2}))*)?)(?=\\b|\\s|$)/i';
$c = preg_match_all($pattern, $text, $m);
- if ($c) {
+ if ($c !== false) {
// Normalise
$links = array_values($m[0]);
ob_start();
$links = array_map(function($value) use ($tidy) {
- return $tidy ? \web_address_to_uri($value, true) : $value;
- }, $links);
+ return $tidy ? \web_address_to_uri($value, true) : $value;
+ }, $links);
ob_end_clean();
// $links = ['http://someurl.tld', •••]
View
13 tests/BarnabyWalters/Helpers/HelpersTest.php
@@ -279,11 +279,22 @@ public function testAtomDate() {
* @group text
* @group helpers
*/
- public function testFindURLs() {
+ public function testFindUrlsFindsUrlsInPlaintext() {
$testString = 'Okay, so this string contains some URLs. http://waterpigs.co.uk, identi.ca, aaron.pk';
$testArray = array('http://waterpigs.co.uk', 'http://identi.ca', 'http://aaron.pk');
+
$this->assertEquals($testArray, H::findUrls($testString));
}
+
+ public function testFindUrlsFindsUrlsInAHref() {
+ $html = '<div class="p-name entry-title p-summary summary e-content entry-content">
+ <p>T5 <a rel="tag" href="/tags/pingback">#pingback</a> mentioning <a href="http://localhost.com/notes/711/">another note</a> <a rel="tag" href="/tags/test">#test</a> <a rel="tag" href="/tags/notweet">#notweet</a></p>
+ </div>';
+ $expected = ['http://localhost.com/notes/711'];
+ $result = H::findUrls($html);
+
+ $this->assertEquals($expected, $result);
+ }
}

0 comments on commit 759eb5f

Please sign in to comment.
Something went wrong with that request. Please try again.