Permalink
Browse files

Extract and autolink urls without protocol with a dashed domain or su…

…bdomain.
  • Loading branch information...
1 parent 93936f2 commit 6b8e6ee18057ce4051aec19f5c95994179a2e4aa @arttu arttu committed Jun 21, 2012
Showing with 90 additions and 75 deletions.
  1. +26 −31 flowdock-text.js
  2. +1 −1 test/conformance.js
  3. +34 −38 test/conformance_tests/autolink.yml
  4. +29 −5 test/conformance_tests/extract.yml
View
@@ -181,8 +181,7 @@ if (typeof FlowdockText === "undefined" || FlowdockText === null) {
FlowdockText.regexen.validPunycode = regexSupplant(/(?:xn--[0-9a-z]+)/);
FlowdockText.regexen.validDomain = regexSupplant(/(?:#{validSubdomain}*#{validDomainName}(?:#{validGTLD}|#{validCCTLD}|#{validPunycode}))/);
FlowdockText.regexen.pseudoValidIP = regexSupplant(/(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/);
- FlowdockText.regexen.validAsciiDomain = regexSupplant(/(?:(?:[a-z0-9#{latinAccentChars}]+)\.)+(?:#{validGTLD}|#{validCCTLD}|#{validPunycode})/gi);
- FlowdockText.regexen.invalidShortDomain = regexSupplant(/^#{validDomainName}#{validCCTLD}$/);
+ FlowdockText.regexen.validAsciiDomain = regexSupplant(/(?:(?:[a-z0-9#{latinAccentChars}\-]+)\.)+(?:#{validGTLD}|#{validCCTLD}|#{validPunycode})/gi);
FlowdockText.regexen.validPortNumber = regexSupplant(/[0-9]+/);
@@ -387,35 +386,34 @@ if (typeof FlowdockText === "undefined" || FlowdockText === null) {
return text.replace(FlowdockText.regexen.extractUrl, function(match, all, before, url, protocol, port, domain, path, queryString) {
var tldComponents;
- if (protocol) {
- var htmlAttrs = "";
- var after = "";
- for (var k in options) {
- htmlAttrs += stringSupplant(" #{k}=\"#{v}\" ", {k: k, v: options[k].toString().replace(/"/, "&quot;").replace(/</, "&lt;").replace(/>/, "&gt;")});
- }
-
- // In the case of t.co URLs, don't allow additional path characters.
- if (url.match(FlowdockText.regexen.validTcoUrl)) {
- url = RegExp.lastMatch;
- after = RegExp.rightContext;
- }
+ var htmlAttrs = "";
+ var after = "";
+ for (var k in options) {
+ htmlAttrs += stringSupplant(" #{k}=\"#{v}\" ", {k: k, v: options[k].toString().replace(/"/, "&quot;").replace(/</, "&lt;").replace(/>/, "&gt;")});
+ }
- var d = {
- before: before,
- htmlAttrs: htmlAttrs,
- url: FlowdockText.htmlEscape(url),
- after: after
- };
- if (urlEntities && urlEntities[url] && urlEntities[url].display_url) {
- d.displayUrl = FlowdockText.htmlEscape(urlEntities[url].display_url);
- } else {
- d.displayUrl = d.url;
- }
+ // In the case of t.co URLs, don't allow additional path characters.
+ if (url.match(FlowdockText.regexen.validTcoUrl)) {
+ url = RegExp.lastMatch;
+ after = RegExp.rightContext;
+ }
- return stringSupplant("#{before}<a href=\"#{url}\"#{htmlAttrs}>#{displayUrl}</a>#{after}", d);
+ var d = {
+ before: before,
+ htmlAttrs: htmlAttrs,
+ url: FlowdockText.htmlEscape(url),
+ after: after
+ };
+ if (urlEntities && urlEntities[url] && urlEntities[url].display_url) {
+ d.displayUrl = FlowdockText.htmlEscape(urlEntities[url].display_url);
} else {
- return all;
+ d.displayUrl = d.url;
}
+
+ if (!protocol) {
+ d.url = 'http://' + d.url;
+ }
+ return stringSupplant("#{before}<a href=\"#{url}\"#{htmlAttrs}>#{displayUrl}</a>#{after}", d);
});
};
@@ -525,10 +523,7 @@ if (typeof FlowdockText === "undefined" || FlowdockText === null) {
url: asciiDomain,
indices: [startPosition + asciiStartPosition, startPosition + asciiEndPosition]
}
- lastUrlInvalidMatch = asciiDomain.match(FlowdockText.regexen.invalidShortDomain);
- if (!lastUrlInvalidMatch) {
- urls.push(lastUrl);
- }
+ urls.push(lastUrl);
});
// no ASCII-only domain found. Skip the entire URL.
View

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -374,6 +374,22 @@ tests:
text: "text http://example.com,"
expected: "text <a href=\"http://example.com\">http://example.com</a>,"
+ - description: "Autolink url without the protocol and with a dash in subdomain"
+ text: "text ex-ample.com,"
+ expected: "text <a href=\"http://ex-ample.com\">ex-ample.com</a>,"
+
+ - description: "Autolink url without the protocol and with subdomain and with a dash in subdomain"
+ text: "text ex-ample.example.com,"
+ expected: "text <a href=\"http://ex-ample.example.com\">ex-ample.example.com</a>,"
+
+ - description: "Autolink url without the protocol and with a CCTLD and with a dash in subdomain"
+ text: "text ex-ample.fi,"
+ expected: "text <a href=\"http://ex-ample.fi\">ex-ample.fi</a>,"
+
+ - description: "Autolink url without the protocol and with a CCTLD, subdomain and a dash in subdomain"
+ text: "text ex-ample.example.fi,"
+ expected: "text <a href=\"http://ex-ample.example.fi\">ex-ample.example.fi</a>,"
+
- description: "Autolink url with path followed by a comma (wihout the comma)"
text: "In http://example.com/test, Douglas explains 42."
expected: "In <a href=\"http://example.com/test\">http://example.com/test</a>, Douglas explains 42."
@@ -458,45 +474,37 @@ tests:
text: "http://build.lan"
expected: "<a href=\"http://build.lan\">http://build.lan</a>"
- - description: "DO NOT Autolink url without protocol (with www)"
+ - description: "Autolink url without protocol (with www)"
text: "www.example.biz"
- expected: "www.example.biz"
+ expected: "<a href=\"http://www.example.biz\">www.example.biz</a>"
- - description: "DO NOT Autolink url without protocol (with WWW)"
+ - description: "Autolink url without protocol (with WWW)"
text: "WWW.EXAMPLE.BIZ"
- expected: "WWW.EXAMPLE.BIZ"
+ expected: "<a href=\"http://WWW.EXAMPLE.BIZ\">WWW.EXAMPLE.BIZ</a>"
- - description: "DO NOT Autolink URL without protocol and without www (ending in .com)"
+ - description: "Autolink URL without protocol and without www (ending in .com)"
text: "foo.com"
- expected: "foo.com"
+ expected: "<a href=\"http://foo.com\">foo.com</a>"
- - description: "DO NOT Autolink URL without protocol and without www (ending in .org)"
+ - description: "Autolink URL without protocol and without www (ending in .org)"
text: "foo.org"
- expected: "foo.org"
+ expected: "<a href=\"http://foo.org\">foo.org</a>"
- - description: "DO NOT Autolink URL without protocol and without www (ending in .net)"
+ - description: "Autolink URL without protocol and without www (ending in .net)"
text: "foo.net"
- expected: "foo.net"
+ expected: "<a href=\"http://foo.net\">foo.net</a>"
- - description: "DO NOT Autolink URL without protocol and without www (ending in .gov)"
+ - description: "Autolink URL without protocol and without www (ending in .gov)"
text: "foo.gov"
- expected: "foo.gov"
+ expected: "<a href=\"http://foo.gov\">foo.gov</a>"
- - description: "DO NOT Autolink URL without protocol and without www (ending in .edu)"
+ - description: "Autolink URL without protocol and without www (ending in .edu)"
text: "foo.edu"
- expected: "foo.edu"
-
- - description: "Autolink URL without protocol and without www not ending in /.(edu|com|gov|org|net)/"
- text: "foo.it twitter.co.jp foo.commerce foo.nettastic foo.us foo.co.uk"
- expected: "foo.it twitter.co.jp foo.commerce foo.nettastic foo.us foo.co.uk"
+ expected: "<a href=\"http://foo.edu\">foo.edu</a>"
- - description: "Multiple URLs with different protocols but not without a protocol"
+ - description: "Multiple URLs with different protocols"
text: "http://foo.com AND https://bar.com AND www.foobar.com"
- expected: "<a href=\"http://foo.com\">http://foo.com</a> AND <a href=\"https://bar.com\">https://bar.com</a> AND www.foobar.com"
-
- - description: "Autolink raw domain followed by domain only links the first"
- text: "See http://example.com example.com"
- expected: "See <a href=\"http://example.com\">http://example.com</a> example.com"
+ expected: "<a href=\"http://foo.com\">http://foo.com</a> AND <a href=\"https://bar.com\">https://bar.com</a> AND <a href=\"http://www.foobar.com\">www.foobar.com</a>"
- description: "Autolink url that includes @-sign and numeric dir under it"
text: "http://www.flickr.com/photos/29674651@N00/4382024406"
@@ -534,10 +542,6 @@ tests:
text: "Is www.-foo.com a valid URL?"
expected: "Is www.-foo.com a valid URL?"
- - description: "Autolink URL should NOT autolink a domain with a valid dash but no protocol"
- text: "Is www.foo-bar.com a valid URL?"
- expected: "Is www.foo-bar.com a valid URL?"
-
- description: "Autolink URL should autolink a domain with a valid dash and a protocol"
text: "Is http://www.foo-bar.com a valid URL?"
expected: "Is <a href=\"http://www.foo-bar.com\">http://www.foo-bar.com</a> a valid URL?"
@@ -566,21 +570,13 @@ tests:
text: "Go to http://example.com/view/slug-url-?foo=bar"
expected: "Go to <a href=\"http://example.com/view/slug-url-?foo=bar\">http://example.com/view/slug-url-?foo=bar</a>"
- - description: "Autolink URL should NOT link URLs with domains beginning in a space"
- text: "@user Try http:// example.com/path"
- expected: "@user Try http:// example.com/path"
-
- - description: "Autolink URL should NOT link URLs with domains beginning in a non-breaking space (U+00A0)"
- text: "@user Try http:// example.com/path"
- expected: "@user Try http:// example.com/path"
-
- description: "Autolink URL should link paths containing accented characters"
text: "See: http://example.com/café"
expected: "See: <a href=\"http://example.com/café\">http://example.com/café</a>"
- - description: "Autolink URL should not link URL without protocol"
+ - description: "Autolink URL should link URL without protocol"
text: "See: www.twitter.com or twitter.com/twitter"
- expected: "See: www.twitter.com or twitter.com/twitter"
+ expected: "See: <a href=\"http://www.twitter.com\">www.twitter.com</a> or <a href=\"http://twitter.com/twitter\">twitter.com/twitter</a>"
- description: "Autolink t.co URL followed by punctuation"
text: "See: http://t.co/abcde's page"
@@ -218,8 +218,32 @@ tests:
expected: ["foo.com", "foo.net", "foo.org", "foo.edu", "foo.gov"]
- description: "Extract URLs without protocol not on (com|org|edu|gov|net) domains"
- text: "foo.bar foo.co.jp www.foo.bar www.foo.co.uk wwwww.foo foo.comm foo.somecom foo.govedu foo.jp"
- expected: ["foo.co.jp", "www.foo.co.uk"]
+ text: "foo.bar foo.co.jp www.foo.bar www.foo.co.uk wwwww.foo foo.comm foo.somecom foo.govedu foo.jp foo.something.fi"
+ expected: ["foo.co.jp", "www.foo.co.uk", "foo.jp", "foo.something.fi"]
+
+ - description: "Extract URLs without the protocol with a dash in the subdomain"
+ text: "test twitter-dash.com"
+ expected: ["twitter-dash.com"]
+
+ - description: "Extract URLs without the protocol and with CCTLD and a dash in the subdomain"
+ text: "test twitter-dash.fi"
+ expected: ["twitter-dash.fi"]
+
+ - description: "DO NOT Extract URLs without the protocol starting with a dash"
+ text: "test -dash.com"
+ expected: []
+
+ - description: "DO NOT Extract URLs without the protocol with a subdomain ending to a dash"
+ text: "test dash-.domain.com"
+ expected: []
+
+ - description: "DO NOT Extract URLs without the protocol ending with a dash and without the protocol"
+ text: "test dash-.com"
+ expected: []
+
+ - description: "DO NOT Extract URLs without the protocol having only a dash in domain"
+ text: "test -.com"
+ expected: []
- description: "Extract URLs without protocol on ccTLD with slash"
text: "t.co/abcde bit.ly/abcde"
@@ -253,9 +277,9 @@ tests:
text: "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde"
expected: ["http://twitter.com/", "example.com", "http://t.co/abcde", "twitter.com", "example2.com", "http://twitter.com/abcde"]
- - description: "DO NOT extract short URLs without protocol on ccTLD domains without path"
+ - description: "Extract short URLs without protocol on ccTLD domains without path"
text: "twitter.jp日本語t.co中国語foo.jp t.co foo.jp"
- expected: []
+ expected: ["twitter.jp", "t.co", "foo.jp", "t.co", "foo.jp"]
- description: "Extract URLs beginning with a non-breaking space (U+00A0)"
text: "@user Try http:// example.com/path"
@@ -388,7 +412,7 @@ tests:
- description: "Extract URL before newline"
text: "http://twitter.com\nhttp://example.com\nhttp://example.com/path\nexample.com/path\nt.co\nt.co/abcde"
- expected: ["http://twitter.com", "http://example.com", "http://example.com/path", "example.com/path", "t.co/abcde"]
+ expected: ["http://twitter.com", "http://example.com", "http://example.com/path", "example.com/path", "t.co", "t.co/abcde"]
- description: "DO NOT extract URL if preceded by $"
text: "$http://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA"

0 comments on commit 6b8e6ee

Please sign in to comment.