Skip to content

Commit

Permalink
Support unescaped unicode line and paragraph separators in string lit…
Browse files Browse the repository at this point in the history
…erals.

https://github.com/tc39/proposal-json-superset

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=232583800
  • Loading branch information
johnplaisted authored and tjgq committed Feb 7, 2019
1 parent 7b53da9 commit a6aeaab
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 11 deletions.
19 changes: 15 additions & 4 deletions src/com/google/javascript/jscomp/CodeGenerator.java
Expand Up @@ -1806,10 +1806,6 @@ private String strEscape(
case '$': sb.append(dollarEscape); break;
case '`': sb.append(backtickEscape); break;

// From LineTerminators (ES5 Section 7.3, Table 3)
case '\u2028': sb.append("\\u2028"); break;
case '\u2029': sb.append("\\u2029"); break;

case '=':
// '=' is a syntactically significant regexp character.
if (trustedStrings || isRegexp) {
Expand Down Expand Up @@ -1868,7 +1864,22 @@ private String strEscape(
sb.append(c);
}
break;

default:
if (isRegexp) {
// In 2019 these characters (line and paragraph separators) are valid in strings but
// not regular expressions.
// https://github.com/tc39/proposal-json-superset
if (c == '\u2028') {
sb.append("\\u2028");
break;
}
if (c == '\u2029') {
sb.append("\\u2029");
break;
}
}

if ((outputCharsetEncoder != null && outputCharsetEncoder.canEncode(c))
|| (c > 0x1f && c < 0x7f)) {
// If we're given an outputCharsetEncoder, then check if the character can be
Expand Down
Expand Up @@ -52,8 +52,18 @@ public final class MarkUntranspilableFeaturesAsRemoved extends AbstractPostOrder
Feature.REGEXP_NAMED_GROUPS,
Feature.REGEXP_UNICODE_PROPERTY_ESCAPE);

private static final FeatureSet UNTRANSPILABLE_2019_FEATURES =
FeatureSet.BARE_MINIMUM.with(
// We could transpile this, but there's no point. We always escape these in the output,
// no need to have a separate pass to escape them. So we'll piggy back off this pass to
// mark it as transpiled. Note that we never complain that this feature won't be
// transpiled below.
Feature.UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP);

private static final FeatureSet ALL_UNTRANSPILABLE_FEATURES =
FeatureSet.BARE_MINIMUM.union(UNTRANSPILABLE_2018_FEATURES);
FeatureSet.BARE_MINIMUM
.union(UNTRANSPILABLE_2018_FEATURES)
.union(UNTRANSPILABLE_2019_FEATURES);

private static final FeatureSet ALL_TRANSPILABLE_FEATURES;

Expand Down
Expand Up @@ -166,6 +166,10 @@ public enum Feature {
// https://github.com/tc39/proposal-regexp-unicode-property-escapes
REGEXP_UNICODE_PROPERTY_ESCAPE("RegExp unicode property escape", LangVersion.ES2018),

// ES 2019 adds https://github.com/tc39/proposal-json-superset
UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP(
"Unescaped unicode line or paragraph separator", LangVersion.ES_NEXT),

// ES 2019 adds optional catch bindings:
// https://github.com/tc39/proposal-optional-catch-binding
OPTIONAL_CATCH_BINDING("Optional catch binding", LangVersion.ES_UNSUPPORTED),
Expand Down
6 changes: 6 additions & 0 deletions src/com/google/javascript/jscomp/parsing/parser/Parser.java
Expand Up @@ -2327,6 +2327,12 @@ private IdentifierExpressionTree parseIdentifierExpression() {
private LiteralExpressionTree parseLiteralExpression() {
SourcePosition start = getTreeStartLocation();
Token literal = nextLiteralToken();

if (literal.type == TokenType.STRING
&& ((StringLiteralToken) literal).hasUnescapedUnicodeLineOrParagraphSeparator()) {
recordFeatureUsed(Feature.UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP);
}

return new LiteralExpressionTree(getTreeLocation(start), literal);
}

Expand Down
33 changes: 27 additions & 6 deletions src/com/google/javascript/jscomp/parsing/parser/Scanner.java
Expand Up @@ -16,6 +16,7 @@

package com.google.javascript.jscomp.parsing.parser;


import com.google.errorprone.annotations.FormatMethod;
import com.google.errorprone.annotations.FormatString;
import com.google.javascript.jscomp.parsing.parser.trees.Comment;
Expand Down Expand Up @@ -313,6 +314,18 @@ private static boolean isLineTerminator(char ch) {
}
}

// Allow line separator and paragraph separator in string literals.
// https://github.com/tc39/proposal-json-superset
private static boolean isStringLineTerminator(char ch) {
switch (ch) {
case '\u2028': // Line Separator
case '\u2029': // Paragraph Separator
return false;
default:
return isLineTerminator(ch);
}
}

// 7.4 Comments
private void skipComments() {
while (skipComment())
Expand Down Expand Up @@ -827,19 +840,27 @@ private static boolean isIdentifierPart(char ch) {
}

private Token scanStringLiteral(int beginIndex, char terminator) {
boolean hasUnescapedUnicodeLineOrParagraphSeparator = false;
while (peekStringLiteralChar(terminator)) {
char c = peekChar();
hasUnescapedUnicodeLineOrParagraphSeparator =
hasUnescapedUnicodeLineOrParagraphSeparator || c == '\u2028' || c == '\u2029';
if (!skipStringLiteralChar()) {
return new LiteralToken(
TokenType.STRING, getTokenString(beginIndex), getTokenRange(beginIndex));
return new StringLiteralToken(
getTokenString(beginIndex),
getTokenRange(beginIndex),
hasUnescapedUnicodeLineOrParagraphSeparator);
}
}
if (peekChar() != terminator) {
reportError(getPosition(beginIndex), "Unterminated string literal");
} else {
nextChar();
}
return new LiteralToken(
TokenType.STRING, getTokenString(beginIndex), getTokenRange(beginIndex));
return new StringLiteralToken(
getTokenString(beginIndex),
getTokenRange(beginIndex),
hasUnescapedUnicodeLineOrParagraphSeparator);
}

private Token scanTemplateLiteral(int beginIndex) {
Expand Down Expand Up @@ -893,7 +914,7 @@ private String getTokenString(int beginIndex) {
}

private boolean peekStringLiteralChar(char terminator) {
return !isAtEnd() && peekChar() != terminator && !isLineTerminator(peekChar());
return !isAtEnd() && peekChar() != terminator && !isStringLineTerminator(peekChar());
}

private boolean skipStringLiteralChar() {
Expand Down Expand Up @@ -996,7 +1017,7 @@ private boolean skipStringLiteralEscapeSequence() {
reportError("Unterminated string literal escape sequence");
return false;
}
if (isLineTerminator(peekChar())) {
if (isStringLineTerminator(peekChar())) {
skipLineTerminator();
return true;
}
Expand Down
@@ -0,0 +1,34 @@
/*
* Copyright 2019 The Closure Compiler Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.javascript.jscomp.parsing.parser;

import com.google.javascript.jscomp.parsing.parser.util.SourceRange;

/** A single or double quoted JavaScript string literal. */
public class StringLiteralToken extends LiteralToken {
private final boolean hasUnescapedUnicodeLineOrParagraphSeparator;

public StringLiteralToken(
String value, SourceRange location, boolean hasUnescapedUnicodeLineOrParagraphSeparator) {
super(TokenType.STRING, value, location);
this.hasUnescapedUnicodeLineOrParagraphSeparator = hasUnescapedUnicodeLineOrParagraphSeparator;
}

public boolean hasUnescapedUnicodeLineOrParagraphSeparator() {
return hasUnescapedUnicodeLineOrParagraphSeparator;
}
}
20 changes: 20 additions & 0 deletions test/com/google/javascript/jscomp/CodePrinterTest.java
Expand Up @@ -38,6 +38,26 @@
public final class CodePrinterTest extends CodePrinterTestBase {
private static final Joiner LINE_JOINER = Joiner.on('\n');

@Test
public void testUnescapedUnicodeLineSeparator() {
languageMode = LanguageMode.ECMASCRIPT_2015;
assertPrintSame("`\u2028`");

languageMode = LanguageMode.ECMASCRIPT_NEXT;
assertPrint("'\u2028'", "\"\\u2028\"");
assertPrint("\"\u2028\"", "\"\\u2028\"");
}

@Test
public void testUnescapedUnicodeParagraphSeparator() {
languageMode = LanguageMode.ECMASCRIPT_2015;
assertPrintSame("`\u2029`");

languageMode = LanguageMode.ECMASCRIPT_NEXT;
assertPrint("'\u2029'", "\"\\u2029\"");
assertPrint("\"\u2029\"", "\"\\u2029\"");
}

@Test
public void testOptionalCatchBlock() {
useUnsupportedFeatures = true;
Expand Down
18 changes: 18 additions & 0 deletions test/com/google/javascript/jscomp/parsing/ParserTest.java
Expand Up @@ -94,6 +94,24 @@ public void setUp() throws Exception {
expectedFeatures = FeatureSet.BARE_MINIMUM;
}

@Test
public void testParseUnescapedLineSep() {
parse("`\u2028`;");

expectFeatures(Feature.UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP);
parse("\"\u2028\";");
parse("'\u2028';");
}

@Test
public void testParseUnescapedParagraphSep() {
parse("`\u2029`;");

expectFeatures(Feature.UNESCAPED_UNICODE_LINE_OR_PARAGRAPH_SEP);
parse("\"\u2029\";");
parse("'\u2029';");
}

@Test
public void testOptionalCatchBinding() {
mode = LanguageMode.UNSUPPORTED;
Expand Down

0 comments on commit a6aeaab

Please sign in to comment.