-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial implementation of raw string literals (#1304)
* test cases for raw string literals * raw string literal implementation * match as block string if starting with triple ", and better error message for simple string except for *#"""#* * fix broken test case block string literal cannot be one line * test cases for raw string literals * raw string literal implementation * match as block string if starting with triple ", and better error message for simple string except for *#"""#* * fix broken test case block string literal cannot be one line * removed unused initial value * rename flag to indicate multi-line string and remove comment * use * to get value from std::optional * clean-ups * removed skip_scan flag and directly return in case of a single line string starting with #+\'\'\' * Updated error message: simple string -> single-line string. Co-authored-by: josh11b <josh11b@users.noreply.github.com> * Updated test cases according to changes in error message * Removed counting_hashtag flag. * Implemented ScanHelper class to handle scanning * Fixed explanation of ReadHashTags. * Addressed PR comment. * Clarify that scan_helper holds the source text. * Addressed PR comments. * Updated error messages in test cases. * Added const keyword to return type of GetCurrentStr(). * addressed PR comments. 1. Moved ScanHelper class to lex_scan_helper.h and lex_scan_helper.cpp. 2. Moved ReadHashTags and Process* functions to lex_scan_helper.cpp. Moved YY_USER_ACTION, SIMPLE_TOKEN and ARG_TOKEN to lex_helper.h. Added a wrapper function YyinputWrapper to call static function yyinput in lexer.lpp. 3. Renamed ScanHelper with StringLexHelper. 4. Modified BUILD accordingly. 5. Renamed data members and functions. * Addressed PR comments. 1. Adjusted order to keep ret usage close. 2. Used resize to construct the string to avoid creation of temp string. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> * Removed the multi_line flag and skip_read field to improve readability. * Copied default parameter value to definition of UnescapeStringLiteral. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> * Copied default parameter value to definition of ParseBlockStringLiteral. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> * Prefix CARBON_ to SIMPLE_TOKEN and ARG_TOKEN macros. * Rollback redefinition of arguments. * Updated comment on the flex macro. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> * Updated wording. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> * Moved the EOF error out of the loop. * Removed duplicated declaration. * Changed type of `hashtag_num` and `leading_quotes` to int. * Minor fix: string copy. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> * Added comment on YyinputWrapper. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> * Garmmar in comment. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> * Added check of eof before readling next char. * Minor updates based on PR comments. * Minor changes to address PR comments. * Used a clearer way to calculate `hashtag_num` and `leading_quotes`. Switched back to indicate muti-line string with a flag. * Directly copy StringRef for compilation error message. * Make str_with_quote const as we don't change it. Co-authored-by: josh11b <josh11b@users.noreply.github.com> * Added TODO for unsupported cases. * Fixed a typo. Co-authored-by: Jon Ross-Perkins <jperkins@google.com> Co-authored-by: josh11b <josh11b@users.noreply.github.com> Co-authored-by: Jon Ross-Perkins <jperkins@google.com>
- Loading branch information
1 parent
a1be2a8
commit 8d0f336
Showing
22 changed files
with
683 additions
and
236 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#ifndef CARBON_EXPLORER_SYNTAX_LEX_HELPER_H_ | ||
#define CARBON_EXPLORER_SYNTAX_LEX_HELPER_H_ | ||
|
||
// Flex expands this macro immediately before each action. | ||
// | ||
// Advances the current token position by yyleng columns without changing | ||
// the line number, and takes us out of the after-whitespace / after-operand | ||
// state. | ||
#define YY_USER_ACTION \ | ||
context.current_token_position.columns(yyleng); \ | ||
if (YY_START == AFTER_WHITESPACE || YY_START == AFTER_OPERAND) { \ | ||
BEGIN(INITIAL); \ | ||
} | ||
|
||
#define CARBON_SIMPLE_TOKEN(name) \ | ||
Carbon::Parser::make_##name(context.current_token_position); | ||
|
||
#define CARBON_ARG_TOKEN(name, arg) \ | ||
Carbon::Parser::make_##name(arg, context.current_token_position); | ||
|
||
#endif // CARBON_EXPLORER_SYNTAX_LEX_HELPER_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include "explorer/syntax/lex_scan_helper.h" | ||
|
||
#include "common/string_helpers.h" | ||
#include "explorer/syntax/lex_helper.h" | ||
#include "llvm/Support/FormatVariadic.h" | ||
|
||
namespace Carbon { | ||
|
||
auto StringLexHelper::Advance() -> bool { | ||
CARBON_CHECK(is_eof_ == false); | ||
const char c = YyinputWrapper(yyscanner_); | ||
if (c <= 0) { | ||
context_.RecordSyntaxError("Unexpected end of file"); | ||
is_eof_ = true; | ||
return false; | ||
} | ||
str_.push_back(c); | ||
return true; | ||
} | ||
|
||
auto ReadHashTags(Carbon::StringLexHelper& scan_helper, | ||
const size_t hashtag_num) -> bool { | ||
for (size_t i = 0; i < hashtag_num; ++i) { | ||
if (!scan_helper.Advance() || scan_helper.last_char() != '#') { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
auto ProcessSingleLineString(llvm::StringRef str, | ||
Carbon::ParseAndLexContext& context, | ||
const size_t hashtag_num) | ||
-> Carbon::Parser::symbol_type { | ||
std::string hashtags(hashtag_num, '#'); | ||
const auto str_with_quote = str; | ||
CARBON_CHECK(str.consume_front(hashtags + "\"") && | ||
str.consume_back("\"" + hashtags)); | ||
|
||
std::optional<std::string> unescaped = | ||
Carbon::UnescapeStringLiteral(str, hashtag_num); | ||
if (unescaped == std::nullopt) { | ||
return context.RecordSyntaxError( | ||
llvm::formatv("Invalid escaping in string: {0}", str_with_quote)); | ||
} | ||
return CARBON_ARG_TOKEN(string_literal, *unescaped); | ||
} | ||
|
||
auto ProcessMultiLineString(llvm::StringRef str, | ||
Carbon::ParseAndLexContext& context, | ||
const size_t hashtag_num) | ||
-> Carbon::Parser::symbol_type { | ||
std::string hashtags(hashtag_num, '#'); | ||
CARBON_CHECK(str.consume_front(hashtags) && str.consume_back(hashtags)); | ||
Carbon::ErrorOr<std::string> block_string = | ||
Carbon::ParseBlockStringLiteral(str, hashtag_num); | ||
if (!block_string.ok()) { | ||
return context.RecordSyntaxError(llvm::formatv( | ||
"Invalid block string: {0}", block_string.error().message())); | ||
} | ||
return CARBON_ARG_TOKEN(string_literal, *block_string); | ||
} | ||
|
||
} // namespace Carbon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#ifndef CARBON_EXPLORER_SYNTAX_LEX_SCAN_HELPER_H_ | ||
#define CARBON_EXPLORER_SYNTAX_LEX_SCAN_HELPER_H_ | ||
|
||
#include <string> | ||
|
||
#include "explorer/syntax/parse_and_lex_context.h" | ||
#include "explorer/syntax/parser.h" | ||
|
||
// Exposes yyinput; defined in lexer.lpp. | ||
extern auto YyinputWrapper(yyscan_t yyscanner) -> int; | ||
|
||
namespace Carbon { | ||
|
||
class StringLexHelper { | ||
public: | ||
StringLexHelper(const char* text, yyscan_t yyscanner, | ||
Carbon::ParseAndLexContext& context) | ||
: str_(text), yyscanner_(yyscanner), context_(context), is_eof_(false) {} | ||
// Advances yyscanner by one char. Sets is_eof to true and returns false on | ||
// EOF. | ||
auto Advance() -> bool; | ||
// Returns the last scanned char. | ||
auto last_char() -> char { return str_.back(); }; | ||
// Returns the scanned string. | ||
auto str() -> const std::string& { return str_; }; | ||
|
||
auto is_eof() -> bool { return is_eof_; }; | ||
|
||
private: | ||
std::string str_; | ||
yyscan_t yyscanner_; | ||
Carbon::ParseAndLexContext& context_; | ||
// Skips reading next char. | ||
bool is_eof_; | ||
}; | ||
|
||
// Tries to Read `hashtag_num` hashtags. Returns true on success. | ||
// Reads `hashtag_num` characters on success, and number of consecutive hashtags | ||
// (< `hashtag_num`) + 1 characters on failure. | ||
auto ReadHashTags(Carbon::StringLexHelper& scan_helper, size_t hashtag_num) | ||
-> bool; | ||
|
||
// Removes quotes and escapes a single line string. Reports an error on | ||
// invalid escaping. | ||
auto ProcessSingleLineString(llvm::StringRef str, | ||
Carbon::ParseAndLexContext& context, | ||
size_t hashtag_num) -> Carbon::Parser::symbol_type; | ||
auto ProcessMultiLineString(llvm::StringRef str, | ||
Carbon::ParseAndLexContext& context, | ||
size_t hashtag_num) -> Carbon::Parser::symbol_type; | ||
|
||
} // namespace Carbon | ||
|
||
#endif // CARBON_EXPLORER_SYNTAX_LEX_SCAN_HELPER_H_ |
Oops, something went wrong.