Skip to content

Commit

Permalink
Merge pull request #34414 from maustinstar/sr-10011
Browse files Browse the repository at this point in the history
[SR-10011] [Lexer] Raw Strings escape character sequence resembling multiline delimiter
  • Loading branch information
rintaro committed Oct 27, 2020
2 parents 290923c + 75eed47 commit 9269c5c
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 19 deletions.
57 changes: 40 additions & 17 deletions lib/Parse/Lexer.cpp
Expand Up @@ -1241,19 +1241,6 @@ static bool diagnoseZeroWidthMatchAndAdvance(char Target, const char *&CurPtr,
return *CurPtr == Target && CurPtr++;
}

/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter.
static bool advanceIfMultilineDelimiter(const char *&CurPtr,
DiagnosticEngine *Diags) {
const char *TmpPtr = CurPtr;
if (*(TmpPtr - 1) == '"' &&
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags) &&
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags)) {
CurPtr = TmpPtr;
return true;
}
return false;
}

/// advanceIfCustomDelimiter - Extracts/detects any custom delimiter on
/// opening a string literal, advances CurPtr if a delimiter is found and
/// returns a non-zero delimiter length. CurPtr[-1] must be '#' when called.
Expand Down Expand Up @@ -1300,6 +1287,37 @@ static bool delimiterMatches(unsigned CustomDelimiterLen, const char *&BytesPtr,
return true;
}

/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter.
static bool advanceIfMultilineDelimiter(unsigned CustomDelimiterLen,
const char *&CurPtr,
DiagnosticEngine *Diags,
bool IsOpening = false) {

// Test for single-line string literals that resemble multiline delimiter.
const char *TmpPtr = CurPtr + 1;
if (IsOpening && CustomDelimiterLen) {
while (*TmpPtr != '\r' && *TmpPtr != '\n') {
if (*TmpPtr == '"') {
if (delimiterMatches(CustomDelimiterLen, ++TmpPtr, nullptr)) {
return false;
}
continue;
}
++TmpPtr;
}
}

TmpPtr = CurPtr;
if (*(TmpPtr - 1) == '"' &&
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags) &&
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags)) {
CurPtr = TmpPtr;
return true;
}

return false;
}

/// lexCharacter - Read a character and return its UTF32 code. If this is the
/// end of enclosing string/character sequence (i.e. the character is equal to
/// 'StopQuote'), this returns ~0U and advances 'CurPtr' pointing to the end of
Expand Down Expand Up @@ -1342,7 +1360,8 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,

DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr;
auto TmpPtr = CurPtr;
if (IsMultilineString && !advanceIfMultilineDelimiter(TmpPtr, D))
if (IsMultilineString &&
!advanceIfMultilineDelimiter(CustomDelimiterLen, TmpPtr, D))
return '"';
if (CustomDelimiterLen &&
!delimiterMatches(CustomDelimiterLen, TmpPtr, D, /*IsClosing=*/true))
Expand Down Expand Up @@ -1478,7 +1497,9 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
if (!inStringLiteral()) {
// Open string literal.
OpenDelimiters.push_back(CurPtr[-1]);
AllowNewline.push_back(advanceIfMultilineDelimiter(CurPtr, nullptr));
AllowNewline.push_back(advanceIfMultilineDelimiter(CustomDelimiterLen,
CurPtr, nullptr,
true));
CustomDelimiter.push_back(CustomDelimiterLen);
continue;
}
Expand All @@ -1490,7 +1511,8 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
continue;

// Multi-line string can only be closed by '"""'.
if (AllowNewline.back() && !advanceIfMultilineDelimiter(CurPtr, nullptr))
if (AllowNewline.back() &&
!advanceIfMultilineDelimiter(CustomDelimiterLen, CurPtr, nullptr))
continue;

// Check whether we have equivalent number of '#'s.
Expand Down Expand Up @@ -1827,7 +1849,8 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
// diagnostics about changing them to double quotes.
assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start");

bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags);
bool IsMultilineString = advanceIfMultilineDelimiter(CustomDelimiterLen,
CurPtr, Diags, true);
if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r')
diagnose(CurPtr, diag::lex_illegal_multiline_string_start)
.fixItInsert(Lexer::getSourceLoc(CurPtr), "\n");
Expand Down
43 changes: 43 additions & 0 deletions test/Parse/raw_string.swift
Expand Up @@ -68,6 +68,49 @@ _ = ##"""
"""##
// CHECK: "a raw string with \"\"\" in it"

// ===---------- False Multiline Delimiters --------===

/// Source code contains zero-width character in this format: `#"[U+200B]"[U+200B]"#`
/// The check contains zero-width character in this format: `"[U+200B]\"[U+200B]"`
/// If this check fails after you implement `diagnoseZeroWidthMatchAndAdvance`,
/// then you may need to tweak how to test for single-line string literals that
/// resemble a multiline delimiter in `advanceIfMultilineDelimiter` so that it
/// passes again.
/// See https://bugs.swift.org/browse/SR-8678
_ = #"​"​"#
// CHECK: "​\"​"

_ = #""""#
// CHECK: "\"\""

_ = #"""""#
// CHECK: "\"\"\""

_ = #""""""#
// CHECK: "\"\"\"\""

_ = #"""#
// CHECK: "\""

_ = ##""" foo # "# "##
// CHECK: "\"\" foo # \"# "

_ = ###""" "# "## "###
// CHECK: "\"\" \"# \"## "

_ = ###"""##"###
// CHECK: "\"\"##"

_ = "interpolating \(#"""false delimiter"#)"
// CHECK: "interpolating "
// CHECK: "\"\"false delimiter"

_ = """
interpolating \(#"""false delimiters"""#)
"""
// CHECK: "interpolating "
// CHECK: "\"\"false delimiters\"\""

let foo = "Interpolation"
_ = #"\b\b \#(foo)\#(foo) Kappa"#
// CHECK: "\\b\\b "
Expand Down
17 changes: 15 additions & 2 deletions test/Parse/raw_string_errors.swift
Expand Up @@ -9,6 +9,11 @@ let _ = #"\##("invalid")"#
// expected-error@-1{{too many '#' characters in delimited escape}}
// expected-error@-2{{invalid escape sequence in literal}}

let _ = ###"""invalid"######
// expected-error@-1{{too many '#' characters in closing delimiter}}{{26-29=}}
// expected-error@-2{{consecutive statements on a line must be separated by ';'}}
// expected-error@-3{{expected expression}}

let _ = ####"invalid"###
// expected-error@-1{{unterminated string literal}}
Expand All @@ -17,8 +22,16 @@ let _ = ###"invalid"######
// expected-error@-2{{consecutive statements on a line must be separated by ';'}}
// expected-error@-3{{expected expression}}

let _ = ##"""##
let _ = ##"""aa
foobar
##"""##
aa"""##
// expected-error@-3{{multi-line string literal content must begin on a new line}}{{14-14=\n}}
// expected-error@-2{{multi-line string literal closing delimiter must begin on a new line}}{{5-5=\n}}

let _ = #""" foo "bar" #baz
"""#
// expected-error@-2{{multi-line string literal content must begin on a new line}}{{13-13=\n}}

let _ = ###""" "# "##
"""###
// expected-error@-2{{multi-line string literal content must begin on a new line}}{{15-15=\n}}

0 comments on commit 9269c5c

Please sign in to comment.