From 9f686ebda68fdf021418aa1310f70c84e100baf2 Mon Sep 17 00:00:00 2001 From: honeybadgerdontcare Date: Tue, 29 Nov 2016 23:09:26 +0100 Subject: [PATCH] Validator rollup (#6391) * Update the set of characters which the javascript HTML parser allows in HTML tags after the first character in the tag. * Update the set of characters which the javascript HTML parser allows in HTML tags after the first character in the tag. * Avoid mutating the argument that's passed to validateSaxEvents. * Fix a bug in the javascript HTML parser where we don't change the section when manufacturing a closing HEAD tag. * Allow `amp-accordion` to have `disable-session-states` attribute. --- .../0.1/validator-amp-accordion.protoascii | 1 + validator/engine/htmlparser.js | 13 ++++++++++--- validator/engine/validator.js | 4 +--- validator/validator-main.protoascii | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/extensions/amp-accordion/0.1/validator-amp-accordion.protoascii b/extensions/amp-accordion/0.1/validator-amp-accordion.protoascii index af225d3fe0f0..ec9c5b2b1fac 100644 --- a/extensions/amp-accordion/0.1/validator-amp-accordion.protoascii +++ b/extensions/amp-accordion/0.1/validator-amp-accordion.protoascii @@ -54,6 +54,7 @@ tags: { # tag_name: "AMP-ACCORDION" also_requires_tag: "amp-accordion extension .js script" attrs: { name: "animate" value: "" } + attrs: { name: "disable-session-states" value: "" } child_tags: { child_tag_name_oneof: "SECTION" } diff --git a/validator/engine/htmlparser.js b/validator/engine/htmlparser.js index 0571ecc98daf..9d07f668dc9e 100644 --- a/validator/engine/htmlparser.js +++ b/validator/engine/htmlparser.js @@ -234,6 +234,8 @@ class TagNameStack { if (this.handler_.markManufacturedBody) this.handler_.markManufacturedBody(); this.startTag('BODY', []); + } else { + this.region_ = TagRegion.IN_BODY; } } break; @@ -836,8 +838,9 @@ amp.htmlparser.HtmlParser.HEX_ESCAPE_RE_ = /^#x([0-9A-Fa-f]+)$/; * @private */ amp.htmlparser.HtmlParser.INSIDE_TAG_TOKEN_ = new RegExp( - // Don't capture space. - '^\\s*(?:' + + // Don't capture space. In this case, we don't use \s because it includes a + // nonbreaking space which gets included as an attribute in our validation. + '^[ \\t\\n\\f\\r\\v]*(?:' + // Capture an attribute name in group 1, and value in group 3. // We capture the fact that there was an attribute in group 2, since // interpreters are inconsistent in whether a group that matches nothing @@ -889,7 +892,11 @@ amp.htmlparser.HtmlParser.OUTSIDE_TAG_TOKEN_ = new RegExp( // Comments not captured. '|<[!]--[\\s\\S]*?(?:-->|$)' + // '/' captured in group 2 for close tags, and name captured in group 3. - '|<(/)?([a-z!\\?][a-z0-9_:-]*)' + + // The first character of a tag (after possibly '/') can be A-Z, a-z, + // '!' or '?'. The remaining characters are more easily expressed as a + // negative set of: '\0', ' ', '\n', '\r', '\t', '\f', '\v', '>', or + // '/'. + '|<(/)?([a-z!\\?][^\\0 \\n\\r\\t\\f\\v>/]*)' + // Text captured in group 4. '|([^<&>]+)' + // Cruft captured in group 5. diff --git a/validator/engine/validator.js b/validator/engine/validator.js index b3c802270bd4..a2bd4cf258a5 100644 --- a/validator/engine/validator.js +++ b/validator/engine/validator.js @@ -4890,9 +4890,7 @@ amp.validator.validateSaxEvents = function(saxEvents, htmlFormat) { for (const e of saxEvents) { switch (e[0]) { case 'startTag': - e.shift(); - const tagName = e.shift(); - handler.startTag(tagName, e); + handler.startTag(/*tagName=*/e[1], e.slice(2)); break; case 'endTag': handler.endTag(e[1]); diff --git a/validator/validator-main.protoascii b/validator/validator-main.protoascii index 0ce5ca59378a..42e1a55657d7 100644 --- a/validator/validator-main.protoascii +++ b/validator/validator-main.protoascii @@ -25,7 +25,7 @@ min_validator_revision_required: 189 # newer versions of the spec file. This is currently a Google internal # mechanism, validator.js does not use this facility. However, any # change to this file (validator-main.js) requires updating this revision id. -spec_file_revision: 315 +spec_file_revision: 316 # Validator extensions. # =====================