From 759eb5f76e079022dc3342181d500e72285ee978 Mon Sep 17 00:00:00 2001 From: Barnaby Walters Date: Thu, 17 Jan 2013 20:21:14 +0000 Subject: [PATCH] Added HTML test for findUrls() --- BarnabyWalters/Helpers/Helpers.php | 13 +++++++------ tests/BarnabyWalters/Helpers/HelpersTest.php | 13 ++++++++++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/BarnabyWalters/Helpers/Helpers.php b/BarnabyWalters/Helpers/Helpers.php index 305f38e..fa3175a 100644 --- a/BarnabyWalters/Helpers/Helpers.php +++ b/BarnabyWalters/Helpers/Helpers.php @@ -461,20 +461,21 @@ public static function atomDate($date) { * @return array An array containing all the URLs found in $text */ public static function findUrls($text, $tidy = true) { - // Pattern is from 1 cassis.js, slightly modified to not look for twitter names - // E.G. beforehand it would return @tantek for @tantek.com. This function is just interested in addresses, not twitter stuff - $pattern = '/(?:(?:(?:(?:http|https|irc)?:\\/\\/(?:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+(?:\\:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+)?\\@)?)?(?:(?:(?:[a-zA-Z0-9][-a-zA-Z0-9]*\\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|j[emop]|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|museum|m[acdeghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\\:\\d{1,5})?)(?:\\/(?:(?:[!#&-;=?-Z_a-z~])|(?:\\%[a-fA-F0-9]{2}))*)?)(?=\\b|\\s|$)/'; + // Pattern is from 1 cassis.js, slightly modified to not look for + // twitter names E.G. beforehand it would return @tantek for @tantek.com. + // This function is just interested in addresses, not twitter stuff + $pattern = '/(?:(?:(?:(?:http|https|irc)?:\\/\\/(?:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+(?:\\:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+)?\\@)?)?(?:(?:(?:[a-zA-Z0-9][-a-zA-Z0-9]*\\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|j[emop]|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|museum|m[acdeghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\\:\\d{1,5})?)(?:\\/(?:(?:[!#&-;=?-Z_a-z~])|(?:\\%[a-fA-F0-9]{2}))*)?)(?=\\b|\\s|$)/i'; $c = preg_match_all($pattern, $text, $m); - if ($c) { + if ($c !== false) { // Normalise $links = array_values($m[0]); ob_start(); $links = array_map(function($value) use ($tidy) { - return $tidy ? \web_address_to_uri($value, true) : $value; - }, $links); + return $tidy ? \web_address_to_uri($value, true) : $value; + }, $links); ob_end_clean(); // $links = ['http://someurl.tld', •••] diff --git a/tests/BarnabyWalters/Helpers/HelpersTest.php b/tests/BarnabyWalters/Helpers/HelpersTest.php index 14edc33..51079a4 100644 --- a/tests/BarnabyWalters/Helpers/HelpersTest.php +++ b/tests/BarnabyWalters/Helpers/HelpersTest.php @@ -279,11 +279,22 @@ public function testAtomDate() { * @group text * @group helpers */ - public function testFindURLs() { + public function testFindUrlsFindsUrlsInPlaintext() { $testString = 'Okay, so this string contains some URLs. http://waterpigs.co.uk, identi.ca, aaron.pk'; $testArray = array('http://waterpigs.co.uk', 'http://identi.ca', 'http://aaron.pk'); + $this->assertEquals($testArray, H::findUrls($testString)); } + + public function testFindUrlsFindsUrlsInAHref() { + $html = '
+

T5 mentioning another note

+
'; + $expected = ['http://localhost.com/notes/711']; + $result = H::findUrls($html); + + $this->assertEquals($expected, $result); + } }