From a4fb73b6d90bed5701e3a3672b6ee4a9da78d60a Mon Sep 17 00:00:00 2001 From: Jordan Milne Date: Wed, 2 Apr 2014 04:47:30 -0300 Subject: [PATCH] some cleanup to html stuff --- Chrome/manifest.json | 1 + OperaBlink/manifest.json | 1 + RES.safariextension/Info.plist | 1 + XPI/lib/main.js | 1 + lib/HTMLPasteurizer.js | 204 +++++++++++++++++++++++++++ lib/reddit_enhancement_suite.user.js | 2 +- lib/utils.js | 31 +--- 7 files changed, 210 insertions(+), 31 deletions(-) create mode 100644 lib/HTMLPasteurizer.js diff --git a/Chrome/manifest.json b/Chrome/manifest.json index 1d8828fb70..eacffd4611 100644 --- a/Chrome/manifest.json +++ b/Chrome/manifest.json @@ -25,6 +25,7 @@ "jquery-fieldselection.min.js", "tinycon.js", "jquery.tokeninput.js", + "HTMLPasteurizer.js", "snuownd.js", "utils.js", "browsersupport.js", diff --git a/OperaBlink/manifest.json b/OperaBlink/manifest.json index 89376eb593..a3a1f65c8d 100644 --- a/OperaBlink/manifest.json +++ b/OperaBlink/manifest.json @@ -25,6 +25,7 @@ "jquery-fieldselection.min.js", "tinycon.js", "jquery.tokeninput.js", + "HTMLPasteurizer.js", "snuownd.js", "utils.js", "browsersupport.js", diff --git a/RES.safariextension/Info.plist b/RES.safariextension/Info.plist index 8467c08102..4ee1f278d6 100644 --- a/RES.safariextension/Info.plist +++ b/RES.safariextension/Info.plist @@ -35,6 +35,7 @@ jquery-fieldselection.min.js tinycon.js jquery.tokeninput.js + HTMLPasteurizer.js snuownd.js hogan-2.0.0.js utils.js diff --git a/XPI/lib/main.js b/XPI/lib/main.js index d5219b8f22..beb3014d6a 100644 --- a/XPI/lib/main.js +++ b/XPI/lib/main.js @@ -170,6 +170,7 @@ pageMod.PageMod({ self.data.url('jquery-fieldselection.min.js'), self.data.url('tinycon.js'), self.data.url('jquery.tokeninput.js'), + self.data.url('HTMLPasteurizer.js'), self.data.url('snuownd.js'), self.data.url('utils.js'), self.data.url('browsersupport.js'), diff --git a/lib/HTMLPasteurizer.js b/lib/HTMLPasteurizer.js new file mode 100644 index 0000000000..7f97ba5f75 --- /dev/null +++ b/lib/HTMLPasteurizer.js @@ -0,0 +1,204 @@ +/* + * HTMLPasteurizer + * Copyright 2014 Jordan Milne + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +(function(window, $) { + "use strict"; + + var Pasteurizer = {}; + window.Pasteurizer = Pasteurizer; + + // Some older browsers allow whitespace in protocols, but ignore + // it during processing. Strip any weirdness out. + var SCHEME_FILTER = /(:(?!$)|[^:a-z0-9\.\-\+])/ig; + + Pasteurizer.DEFAULT_CONFIG = { + elemWhitelist: [ + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', 'div', 'code', + 'br', 'hr', 'p', 'a', 'img', 'pre', 'blockquote', 'table', + 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', 'strong', 'em', + 'i', 'b', 'u', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', + 'font', 'center', 'small', 's', 'q', 'sub', 'sup', 'del' + ], + // global attribute whitelist + attrWhitelist: [ + 'title', 'colspan', 'rowspan', 'cellspacing', 'cellpadding', + 'scope', 'face', 'color', 'size', 'bgcolor', 'align' + ], + // tag-specific attribute whitelists + tagAttrWhitelist: { + 'img': ['src', 'alt'], + 'a': ['href'] + }, + // Which schemes may be linked to + schemeWhitelist: [ + "http:", "https:", "ftp:", "mailto:", + "git:", "steam:", "irc:", "news:", "mumble:", + "ssh:", "ircs:", "ts3server:", ":" + ], + // Whether or not to hoist the contents of removed nodes up the tree. + hoistOrphanedContents: true, + + // Tags that should *not* have their contents hoisted + hoistBlacklist: ["script", "style"] + }; + + Pasteurizer.scrubNode = function(node, config) { + var jNode = $(node); + var nodeName = node.nodeName.toLowerCase(); + var nodeType = node.nodeType; + + var validNode = false; + + if(nodeType === 1) { + validNode = config.elemWhitelist.indexOf(nodeName) !== -1; + } else if(nodeType < 6 || nodeType === 9 || nodeType == 11) { + validNode = true; + } + + if(validNode && node.nodeType === 1) { + // Kill anchor tags with invalid hrefs. + if(nodeName === "a") { + if(node.protocol !== undefined) { + var scrubbedProto = node.protocol.replace(SCHEME_FILTER, ""); + + // Only allow non-whitelisted schemes unless the document was served via + // the same scheme. + if(config.schemeWhitelist.indexOf(scrubbedProto) === -1 && + scrubbedProto !== document.location.protocol) { + validNode = false; + } + } else { + // TODO: Handle UAs that don't support a.protocol? + // we may need to bundle URL.js. + } + } + } + + if(validNode && node.nodeType === 1) { + // Let's not invalidate any iterators, collect all attribute names. + var attrs = $.map(node.attributes, function(attr){ + return attr.nodeName; + }); + + // Remove unwanted attributes + attrs.forEach(function(attrName) { + + // Is this attr allowed on any node? + if(config.attrWhitelist.indexOf(attrName) !== -1) { + return; + } + + // is this attr allowed on *this* node? + if(nodeName in config.tagAttrWhitelist && + config.tagAttrWhitelist[nodeName].indexOf(attrName) !== -1) { + return; + } + + // jQuery.removeAttr chokes on attribute names containing quotes + node.removeAttribute(attrName); + }); + } + + var canHoist = (config.hoistOrphanedContents && + config.hoistBlacklist.indexOf(nodeName) === -1); + + // Cut out early if we don't need the contents + if(!validNode && !canHoist) { + jNode.remove(); + return; + } + + jNode.contents().each(function(i, child) { + Pasteurizer.scrubNode(child, config); + }); + + if(!validNode) { + // remove the node and put its remaining contents in its place. + jNode.contents().detach().insertAfter(jNode); + jNode.remove(); + } + }; + + Pasteurizer.safeParseHTML = function(html, config) { + + if(!config || $.isEmptyObject(config)) { + config = Pasteurizer.DEFAULT_CONFIG; + } + + + // DOMParser behaves similarly to jQuery.parseHTML, but it won't make any + // requests at parse time. + var parser = new DOMParser(); + + //TODO: handle + var parsed = parser.parseFromString(html, "text/html"); + + // DOMParser wraps HTML fragments in body tags + var body = $(parsed).find('body').first(); + + body.contents().each(function(i, node) { + Pasteurizer.scrubNode(node, config); + }); + return body.contents(); + }; + +}(window, jQuery)); + + + +/* + * DOMParser HTML extension + * 2012-09-04 + * + * By Eli Grey, http://eligrey.com + * Public domain. + * NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. + */ + +/*! @source https://gist.github.com/1129031 */ +/*global document, DOMParser*/ + +(function(DOMParser) { + "use strict"; + + var DOMParser_proto = DOMParser.prototype; + var real_parseFromString = DOMParser_proto.parseFromString; + + // Firefox/Opera/IE throw errors on unsupported types + try { + // WebKit returns null on unsupported types + if ((new DOMParser).parseFromString("", "text/html")) { + // text/html parsing is natively supported + return; + } + } catch (ex) {} + + DOMParser_proto.parseFromString = function(markup, type) { + if (/^\s*text\/html\s*(?:;|$)/i.test(type)) { + var doc = document.implementation.createHTMLDocument(""); + if (markup.toLowerCase().indexOf(' -1) { + doc.documentElement.innerHTML = markup; + } + else { + doc.body.innerHTML = markup; + } + return doc; + } else { + return real_parseFromString.apply(this, arguments); + } + }; +}(DOMParser)); \ No newline at end of file diff --git a/lib/reddit_enhancement_suite.user.js b/lib/reddit_enhancement_suite.user.js index f341a602b5..8c0d89147a 100755 --- a/lib/reddit_enhancement_suite.user.js +++ b/lib/reddit_enhancement_suite.user.js @@ -1,6 +1,6 @@ var RESVersion = "4.3.2"; -var jQuery, $, guiders, Tinycon, SnuOwnd; +var jQuery, $, guiders, Tinycon, SnuOwnd, Pasteurizer; /* Reddit Enhancement Suite - a suite of tools to enhance Reddit diff --git a/lib/utils.js b/lib/utils.js index a1b1777e9c..f6a2ed3914 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -499,36 +499,7 @@ RESUtils.stripHTML = function(str) { return str; }; RESUtils.sanitizeHTML = function(htmlStr) { - if (!this.sanitizer) { - var SnuOwnd = window.SnuOwnd; - var redditCallbacks = SnuOwnd.getRedditCallbacks(); - var callbacks = SnuOwnd.createCustomCallbacks({ - paragraph: function(out, text, options) { - if (text) out.s += text.s; - }, - autolink: redditCallbacks.autolink, - raw_html_tag: redditCallbacks.raw_html_tag - }); - var rendererConfig = SnuOwnd.defaultRenderState(); - rendererConfig.flags = SnuOwnd.DEFAULT_WIKI_FLAGS; - rendererConfig.html_element_whitelist = [ - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', 'div', 'code', - 'br', 'hr', 'p', 'a', 'img', 'pre', 'blockquote', 'table', - 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', 'strong', 'em', - 'i', 'b', 'u', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', - 'font', 'center', 'small', 's', 'q', 'sub', 'sup', 'del' - ]; - rendererConfig.html_attr_whitelist = [ - 'href', 'title', 'src', 'alt', 'colspan', - 'rowspan', 'cellspacing', 'cellpadding', 'scope', - 'face', 'color', 'size', 'bgcolor', 'align' - ]; - this.sanitizer = SnuOwnd.getParser({ - callbacks: callbacks, - context: rendererConfig - }); - } - return this.sanitizer.render(htmlStr); + return Pasteurizer.safeParseHTML(htmlStr).wrapAll('
').parent().html(); }; RESUtils.firstValid = function() { for (var i = 0, len = arguments.length; i < len; i++) {