diff --git a/include/swift/AST/DiagnosticsParse.def b/include/swift/AST/DiagnosticsParse.def index 3f087fe6988f2..94a2a3f8e8dc5 100644 --- a/include/swift/AST/DiagnosticsParse.def +++ b/include/swift/AST/DiagnosticsParse.def @@ -114,6 +114,12 @@ ERROR(lex_unprintable_ascii_character,none, "unprintable ASCII character found in source file", ()) ERROR(lex_invalid_utf8,none, "invalid UTF-8 found in source file", ()) +ERROR(lex_character_empty,none, + "empty character literal", ()) +ERROR(lex_character_invalid_escape,none, + "invalid escape in character literal", ()) +ERROR(lex_character_not_codepoint,none, + "character not expressible as a single codepoint", ()) ERROR(lex_single_quote_string,none, "single-quoted string literal found, use '\"'", ()) ERROR(lex_invalid_curly_quote,none, diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h index d71c0d6e8c9a2..4680033ed36dc 100644 --- a/include/swift/Parse/Lexer.h +++ b/include/swift/Parse/Lexer.h @@ -480,6 +480,8 @@ class Lexer { void operator=(const SILBodyRAII&) = delete; }; + uint32_t lexCharacterLiteral(const char *&CurPtr); + private: /// Nul character meaning kind. enum class NulCharacterKind { diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 347c0a4282b3b..94b294be5a8c3 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -2424,14 +2424,53 @@ void Lexer::lexImpl() { return lexNumber(); case '"': - case '\'': return lexStringLiteral(); + case '\'': { + const char *TokStart = CurPtr-1; + uint32_t CodePoint = lexCharacterLiteral(CurPtr); + return formToken(CodePoint == ~0U ? tok::unknown : + tok::integer_literal, TokStart); + } + case '`': return lexEscapedIdentifier(); } } +uint32_t Lexer::lexCharacterLiteral(const char *&CurPtr) { + uint32_t CodePoint = ~0; + if (*CurPtr == '\'') + diagnose(CurPtr, diag::lex_character_empty); + else if (*CurPtr == '\\') { + switch (*++CurPtr) { + case 't': CodePoint = '\t'; break; + case 'r': CodePoint = '\r'; break; + case 'n': CodePoint = '\n'; break; + case '\\': CodePoint = '\\'; break; + case '\'': ++CurPtr; CodePoint = '\''; break; + default: + diagnose(CurPtr, diag::lex_character_invalid_escape); + } + } + else { + CodePoint = swift::validateUTF8CharacterAndAdvance(CurPtr, BufferEnd); + + if (CodePoint == ~0U) + diagnose(CurPtr, diag::lex_invalid_utf8); + } + + if (CodePoint != ~0U && *CurPtr != '\'') { + diagnose(CurPtr, diag::lex_character_not_codepoint); + CodePoint = ~0U; + } + + while (*CurPtr && *CurPtr != '\n' && *CurPtr++ != '\''); + return CodePoint; +} + + + Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc) { // Don't try to do anything with an invalid location. if (!Loc.isValid()) diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index c5bbd6a8b9269..911bcf260bae5 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -1441,6 +1441,13 @@ ParserResult Parser::parseExprPrimary(Diag<> ID, bool isExprBasic) { switch (Tok.getKind()) { case tok::integer_literal: { StringRef Text = copyAndStripUnderscores(Context, Tok.getText()); + if (Text[0] == '\'') { + const char *CurPtr = Tok.getText().begin() + 1; + uint32_t CodePoint = L->lexCharacterLiteral(CurPtr); + std::string Integer = std::to_string(CodePoint); + char *IntegerBuff = (char *)Context.Allocate(Integer.size() + 1, 1 ); + Text = StringRef(strcpy(IntegerBuff, Integer.c_str()), Integer.size()); + } SourceLoc Loc = consumeToken(tok::integer_literal); ExprContext.setCreateSyntax(SyntaxKind::IntegerLiteralExpr); return makeParserResult(new (Context) diff --git a/stdlib/public/core/Character.swift b/stdlib/public/core/Character.swift index 51a1d51613494..236c5d2e86ea3 100644 --- a/stdlib/public/core/Character.swift +++ b/stdlib/public/core/Character.swift @@ -497,3 +497,9 @@ extension Character: Hashable { hasher.combine(String(self)) } } + +extension Character : ExpressibleByIntegerLiteral { + public init(integerLiteral value: Int) { + self.init(Unicode.Scalar(_value: UInt32(value))) + } +} diff --git a/stdlib/public/core/UnicodeScalar.swift b/stdlib/public/core/UnicodeScalar.swift index 7f2bd02304932..dc3c5d43355ea 100644 --- a/stdlib/public/core/UnicodeScalar.swift +++ b/stdlib/public/core/UnicodeScalar.swift @@ -44,6 +44,12 @@ extension Unicode { } } +extension Unicode.Scalar : ExpressibleByIntegerLiteral { + public init(integerLiteral value: Int) { + self.init(_value: UInt32(value)) + } +} + extension Unicode.Scalar : _ExpressibleByBuiltinUnicodeScalarLiteral, ExpressibleByUnicodeScalarLiteral {