From a6aeaab3ed2cd5f4a28344dedb3d346b8f99cbc1 Mon Sep 17 00:00:00 2001 From: johnplaisted Date: Tue, 5 Feb 2019 16:42:47 -0800 Subject: [PATCH] Support unescaped unicode line and paragraph separators in string literals. https://github.com/tc39/proposal-json-superset ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=232583800 --- .../javascript/jscomp/CodeGenerator.java | 19 ++++++++--- .../MarkUntranspilableFeaturesAsRemoved.java | 12 ++++++- .../jscomp/parsing/parser/FeatureSet.java | 4 +++ .../jscomp/parsing/parser/Parser.java | 6 ++++ .../jscomp/parsing/parser/Scanner.java | 33 ++++++++++++++---- .../parsing/parser/StringLiteralToken.java | 34 +++++++++++++++++++ .../javascript/jscomp/CodePrinterTest.java | 20 +++++++++++ .../javascript/jscomp/parsing/ParserTest.java | 18 ++++++++++ 8 files changed, 135 insertions(+), 11 deletions(-) create mode 100644 src/com/google/javascript/jscomp/parsing/parser/StringLiteralToken.java diff --git a/src/com/google/javascript/jscomp/CodeGenerator.java b/src/com/google/javascript/jscomp/CodeGenerator.java index be8c72b5f77..6423d5ec431 100644 --- a/src/com/google/javascript/jscomp/CodeGenerator.java +++ b/src/com/google/javascript/jscomp/CodeGenerator.java @@ -1806,10 +1806,6 @@ private String strEscape( case '$': sb.append(dollarEscape); break; case '`': sb.append(backtickEscape); break; - // From LineTerminators (ES5 Section 7.3, Table 3) - case '\u2028': sb.append("\\u2028"); break; - case '\u2029': sb.append("\\u2029"); break; - case '=': // '=' is a syntactically significant regexp character. if (trustedStrings || isRegexp) { @@ -1868,7 +1864,22 @@ private String strEscape( sb.append(c); } break; + default: + if (isRegexp) { + // In 2019 these characters (line and paragraph separators) are valid in strings but + // not regular expressions. + // https://github.com/tc39/proposal-json-superset + if (c == '\u2028') { + sb.append("\\u2028"); + break; + } + if (c == '\u2029') { + sb.append("\\u2029"); + break; + } + } + if ((outputCharsetEncoder != null && outputCharsetEncoder.canEncode(c)) || (c > 0x1f && c < 0x7f)) { // If we're given an outputCharsetEncoder, then check if the character can be diff --git a/src/com/google/javascript/jscomp/MarkUntranspilableFeaturesAsRemoved.java b/src/com/google/javascript/jscomp/MarkUntranspilableFeaturesAsRemoved.java index 1bef188fefe..27df57bed23 100644 --- a/src/com/google/javascript/jscomp/MarkUntranspilableFeaturesAsRemoved.java +++ b/src/com/google/javascript/jscomp/MarkUntranspilableFeaturesAsRemoved.java @@ -52,8 +52,18 @@ public final class MarkUntranspilableFeaturesAsRemoved extends AbstractPostOrder Feature.REGEXP_NAMED_GROUPS, Feature.REGEXP_UNICODE_PROPERTY_ESCAPE); + private static final FeatureSet UNTRANSPILABLE_2019_FEATURES = + FeatureSet.BARE_MINIMUM.with( + // We could transpile this, but there's no point. We always escape these in the output, + // no need to have a separate pass to escape them. So we'll piggy back off this pass to + // mark it as transpiled. Note that we never complain that this feature won't be + // transpiled below. + Feature.UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP); + private static final FeatureSet ALL_UNTRANSPILABLE_FEATURES = - FeatureSet.BARE_MINIMUM.union(UNTRANSPILABLE_2018_FEATURES); + FeatureSet.BARE_MINIMUM + .union(UNTRANSPILABLE_2018_FEATURES) + .union(UNTRANSPILABLE_2019_FEATURES); private static final FeatureSet ALL_TRANSPILABLE_FEATURES; diff --git a/src/com/google/javascript/jscomp/parsing/parser/FeatureSet.java b/src/com/google/javascript/jscomp/parsing/parser/FeatureSet.java index 19f33897724..24d0cc4c242 100644 --- a/src/com/google/javascript/jscomp/parsing/parser/FeatureSet.java +++ b/src/com/google/javascript/jscomp/parsing/parser/FeatureSet.java @@ -166,6 +166,10 @@ public enum Feature { // https://github.com/tc39/proposal-regexp-unicode-property-escapes REGEXP_UNICODE_PROPERTY_ESCAPE("RegExp unicode property escape", LangVersion.ES2018), + // ES 2019 adds https://github.com/tc39/proposal-json-superset + UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP( + "Unescaped unicode line or paragraph separator", LangVersion.ES_NEXT), + // ES 2019 adds optional catch bindings: // https://github.com/tc39/proposal-optional-catch-binding OPTIONAL_CATCH_BINDING("Optional catch binding", LangVersion.ES_UNSUPPORTED), diff --git a/src/com/google/javascript/jscomp/parsing/parser/Parser.java b/src/com/google/javascript/jscomp/parsing/parser/Parser.java index 16037b5421c..9017ac51af3 100644 --- a/src/com/google/javascript/jscomp/parsing/parser/Parser.java +++ b/src/com/google/javascript/jscomp/parsing/parser/Parser.java @@ -2327,6 +2327,12 @@ private IdentifierExpressionTree parseIdentifierExpression() { private LiteralExpressionTree parseLiteralExpression() { SourcePosition start = getTreeStartLocation(); Token literal = nextLiteralToken(); + + if (literal.type == TokenType.STRING + && ((StringLiteralToken) literal).hasUnescapedUnicodeLineOrParagraphSeparator()) { + recordFeatureUsed(Feature.UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP); + } + return new LiteralExpressionTree(getTreeLocation(start), literal); } diff --git a/src/com/google/javascript/jscomp/parsing/parser/Scanner.java b/src/com/google/javascript/jscomp/parsing/parser/Scanner.java index 4332f390da8..68f23c9f75c 100644 --- a/src/com/google/javascript/jscomp/parsing/parser/Scanner.java +++ b/src/com/google/javascript/jscomp/parsing/parser/Scanner.java @@ -16,6 +16,7 @@ package com.google.javascript.jscomp.parsing.parser; + import com.google.errorprone.annotations.FormatMethod; import com.google.errorprone.annotations.FormatString; import com.google.javascript.jscomp.parsing.parser.trees.Comment; @@ -313,6 +314,18 @@ private static boolean isLineTerminator(char ch) { } } + // Allow line separator and paragraph separator in string literals. + // https://github.com/tc39/proposal-json-superset + private static boolean isStringLineTerminator(char ch) { + switch (ch) { + case '\u2028': // Line Separator + case '\u2029': // Paragraph Separator + return false; + default: + return isLineTerminator(ch); + } + } + // 7.4 Comments private void skipComments() { while (skipComment()) @@ -827,10 +840,16 @@ private static boolean isIdentifierPart(char ch) { } private Token scanStringLiteral(int beginIndex, char terminator) { + boolean hasUnescapedUnicodeLineOrParagraphSeparator = false; while (peekStringLiteralChar(terminator)) { + char c = peekChar(); + hasUnescapedUnicodeLineOrParagraphSeparator = + hasUnescapedUnicodeLineOrParagraphSeparator || c == '\u2028' || c == '\u2029'; if (!skipStringLiteralChar()) { - return new LiteralToken( - TokenType.STRING, getTokenString(beginIndex), getTokenRange(beginIndex)); + return new StringLiteralToken( + getTokenString(beginIndex), + getTokenRange(beginIndex), + hasUnescapedUnicodeLineOrParagraphSeparator); } } if (peekChar() != terminator) { @@ -838,8 +857,10 @@ private Token scanStringLiteral(int beginIndex, char terminator) { } else { nextChar(); } - return new LiteralToken( - TokenType.STRING, getTokenString(beginIndex), getTokenRange(beginIndex)); + return new StringLiteralToken( + getTokenString(beginIndex), + getTokenRange(beginIndex), + hasUnescapedUnicodeLineOrParagraphSeparator); } private Token scanTemplateLiteral(int beginIndex) { @@ -893,7 +914,7 @@ private String getTokenString(int beginIndex) { } private boolean peekStringLiteralChar(char terminator) { - return !isAtEnd() && peekChar() != terminator && !isLineTerminator(peekChar()); + return !isAtEnd() && peekChar() != terminator && !isStringLineTerminator(peekChar()); } private boolean skipStringLiteralChar() { @@ -996,7 +1017,7 @@ private boolean skipStringLiteralEscapeSequence() { reportError("Unterminated string literal escape sequence"); return false; } - if (isLineTerminator(peekChar())) { + if (isStringLineTerminator(peekChar())) { skipLineTerminator(); return true; } diff --git a/src/com/google/javascript/jscomp/parsing/parser/StringLiteralToken.java b/src/com/google/javascript/jscomp/parsing/parser/StringLiteralToken.java new file mode 100644 index 00000000000..4c15efebcb6 --- /dev/null +++ b/src/com/google/javascript/jscomp/parsing/parser/StringLiteralToken.java @@ -0,0 +1,34 @@ +/* + * Copyright 2019 The Closure Compiler Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.javascript.jscomp.parsing.parser; + +import com.google.javascript.jscomp.parsing.parser.util.SourceRange; + +/** A single or double quoted JavaScript string literal. */ +public class StringLiteralToken extends LiteralToken { + private final boolean hasUnescapedUnicodeLineOrParagraphSeparator; + + public StringLiteralToken( + String value, SourceRange location, boolean hasUnescapedUnicodeLineOrParagraphSeparator) { + super(TokenType.STRING, value, location); + this.hasUnescapedUnicodeLineOrParagraphSeparator = hasUnescapedUnicodeLineOrParagraphSeparator; + } + + public boolean hasUnescapedUnicodeLineOrParagraphSeparator() { + return hasUnescapedUnicodeLineOrParagraphSeparator; + } +} diff --git a/test/com/google/javascript/jscomp/CodePrinterTest.java b/test/com/google/javascript/jscomp/CodePrinterTest.java index c04e89ddb66..be611847abb 100644 --- a/test/com/google/javascript/jscomp/CodePrinterTest.java +++ b/test/com/google/javascript/jscomp/CodePrinterTest.java @@ -38,6 +38,26 @@ public final class CodePrinterTest extends CodePrinterTestBase { private static final Joiner LINE_JOINER = Joiner.on('\n'); + @Test + public void testUnescapedUnicodeLineSeparator() { + languageMode = LanguageMode.ECMASCRIPT_2015; + assertPrintSame("`\u2028`"); + + languageMode = LanguageMode.ECMASCRIPT_NEXT; + assertPrint("'\u2028'", "\"\\u2028\""); + assertPrint("\"\u2028\"", "\"\\u2028\""); + } + + @Test + public void testUnescapedUnicodeParagraphSeparator() { + languageMode = LanguageMode.ECMASCRIPT_2015; + assertPrintSame("`\u2029`"); + + languageMode = LanguageMode.ECMASCRIPT_NEXT; + assertPrint("'\u2029'", "\"\\u2029\""); + assertPrint("\"\u2029\"", "\"\\u2029\""); + } + @Test public void testOptionalCatchBlock() { useUnsupportedFeatures = true; diff --git a/test/com/google/javascript/jscomp/parsing/ParserTest.java b/test/com/google/javascript/jscomp/parsing/ParserTest.java index ec0552b314f..64fa7e99a1f 100644 --- a/test/com/google/javascript/jscomp/parsing/ParserTest.java +++ b/test/com/google/javascript/jscomp/parsing/ParserTest.java @@ -94,6 +94,24 @@ public void setUp() throws Exception { expectedFeatures = FeatureSet.BARE_MINIMUM; } + @Test + public void testParseUnescapedLineSep() { + parse("`\u2028`;"); + + expectFeatures(Feature.UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP); + parse("\"\u2028\";"); + parse("'\u2028';"); + } + + @Test + public void testParseUnescapedParagraphSep() { + parse("`\u2029`;"); + + expectFeatures(Feature.UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP); + parse("\"\u2029\";"); + parse("'\u2029';"); + } + @Test public void testOptionalCatchBinding() { mode = LanguageMode.UNSUPPORTED;