Skip to content

Commit

Permalink
Revert "Revert "Merge pull request #40595 from hamishknight/straw-bal…
Browse files Browse the repository at this point in the history
…es""
  • Loading branch information
milseman committed Dec 19, 2021
1 parent 3ec0413 commit 7bff9da
Show file tree
Hide file tree
Showing 34 changed files with 299 additions and 181 deletions.
9 changes: 7 additions & 2 deletions cmake/modules/AddSwift.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -634,8 +634,13 @@ function(add_libswift_module module)
""
"DEPENDS"
${ARGN})
set(sources ${ALSM_UNPARSED_ARGUMENTS})
list(TRANSFORM sources PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/")
set(raw_sources ${ALSM_UNPARSED_ARGUMENTS})
set(sources)
foreach(raw_source ${raw_sources})
get_filename_component(
raw_source "${raw_source}" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
list(APPEND sources "${raw_source}")
endforeach()

set(target_name "LibSwift${module}")

Expand Down
2 changes: 1 addition & 1 deletion include/swift/AST/ASTContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ class ASTContext final {
KnownProtocolKind builtinProtocol,
llvm::function_ref<DeclName (ASTContext &ctx)> initName) const;

/// Retrieve _StringProcessing.Regex.init(_regexString: String).
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
ConcreteDeclRef getRegexInitDecl(Type regexType) const;

/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
Expand Down
11 changes: 8 additions & 3 deletions include/swift/AST/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -966,18 +966,23 @@ class InterpolatedStringLiteralExpr : public LiteralExpr {
class RegexLiteralExpr : public LiteralExpr {
SourceLoc Loc;
StringRef RegexText;
unsigned Version;

RegexLiteralExpr(SourceLoc loc, StringRef regexText, bool isImplicit)
RegexLiteralExpr(SourceLoc loc, StringRef regexText, unsigned version,
bool isImplicit)
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Loc(loc),
RegexText(regexText) {}
RegexText(regexText), Version(version) {}

public:
static RegexLiteralExpr *createParsed(ASTContext &ctx, SourceLoc loc,
StringRef regexText);
StringRef regexText, unsigned version);

/// Retrieve the raw regex text.
StringRef getRegexText() const { return RegexText; }

/// Retrieve the version of the regex string.
unsigned getVersion() const { return Version; }

SourceRange getSourceRange() const { return Loc; }

static bool classof(const Expr *E) {
Expand Down
1 change: 1 addition & 0 deletions include/swift/AST/KnownIdentifiers.def
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ IDENTIFIER(zero)
// String processing
IDENTIFIER(Regex)
IDENTIFIER_(regexString)
IDENTIFIER(version)
IDENTIFIER_(StringProcessing)

// Distributed actors
Expand Down
42 changes: 34 additions & 8 deletions include/swift/Parse/ExperimentalRegexBridging.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,43 @@
extern "C" {
#endif

typedef const char *(* ParseRegexStrawperson)(const char *);

void Parser_registerParseRegexStrawperson(ParseRegexStrawperson fn);
bool Parser_hasParseRegexStrawperson();
/// Attempt to lex a regex literal string. Takes the following arguments:
///
/// - CurPtrPtr: A pointer to the current pointer of lexer, which should be the
/// start of the literal. This will be advanced to the point at
/// which the lexer should resume, or will remain the same if this
/// is not a regex literal.
/// - BufferEnd: A pointer to the end of the buffer, which should not be lexed
/// past.
/// - ErrorOut: If an error is encountered, this will be set to the error
/// string.
///
/// Returns: A bool indicating whether lexing was completely erroneous, and
/// cannot be recovered from, or false if there either was no error,
/// or there was a recoverable error.
typedef bool(* RegexLiteralLexingFn)(/*CurPtrPtr*/ const char **,
/*BufferEnd*/ const char *,
/*ErrorOut*/ const char **);
void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn);

/// Parse a regex literal string. Takes the following arguments:
///
/// - InputPtr: A null-terminated C string of the regex literal.
/// - ErrorOut: A buffer accepting an error string upon error.
/// - VersionOut: A buffer accepting a regex literal format version.
/// - CaptureStructureOut: A buffer accepting a byte sequence representing the
/// capture structure of the literal.
/// - CaptureStructureSize: The size of the capture structure buffer. Must be
/// greater than or equal to `strlen(InputPtr)`.
typedef void(* RegexLiteralParsingFn)(/*InputPtr*/ const char *,
/*ErrorOut*/ const char **,
/*VersionOut*/ unsigned *,
/*CaptureStructureOut*/ char *,
/*CaptureStructureSize*/ unsigned);
void Parser_registerRegexLiteralParsingFn(RegexLiteralParsingFn fn);

#ifdef __cplusplus
} // extern "C"
#endif

#endif // EXPERIMENTAL_REGEX_BRIDGING


//const char* experimental_regex_strawperson(const char *in);

7 changes: 3 additions & 4 deletions include/swift/Parse/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,9 @@ class Lexer {
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
void lexEscapedIdentifier();

void lexRegexLiteral(const char *TokStart);
/// Attempt to lex a regex literal, returning true if a regex literal was
/// lexed, false if this is not a regex literal.
bool tryLexRegexLiteral(const char *TokStart);

void tryLexEditorPlaceholder();
const char *findEndOfCurlyQuoteStringLiteral(const char *,
Expand All @@ -612,9 +614,6 @@ class Lexer {

/// Emit diagnostics for single-quote string and suggest replacement
/// with double-quoted equivalent.
///
/// Or, if we're in strawperson mode, we will emit a custom
/// error message instead, determined by the Swift library.
void diagnoseSingleQuoteStringLiteral(const char *TokStart,
const char *TokEnd);

Expand Down
2 changes: 1 addition & 1 deletion lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1221,7 +1221,7 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
auto *spModule = getLoadedModule(Id_StringProcessing);
DeclName name(*const_cast<ASTContext *>(this),
DeclBaseName::createConstructor(),
{Id_regexString});
{Id_regexString, Id_version});
SmallVector<ValueDecl *, 1> results;
spModule->lookupQualified(getRegexType(), DeclNameRef(name),
NL_IncludeUsableFromInline, results);
Expand Down
5 changes: 3 additions & 2 deletions lib/AST/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2246,8 +2246,9 @@ SourceLoc TapExpr::getEndLoc() const {

RegexLiteralExpr *
RegexLiteralExpr::createParsed(ASTContext &ctx, SourceLoc loc,
StringRef regexText) {
return new (ctx) RegexLiteralExpr(loc, regexText, /*implicit*/ false);
StringRef regexText, unsigned version) {
return new (ctx) RegexLiteralExpr(loc, regexText, version,
/*implicit*/ false);
}

void swift::simple_display(llvm::raw_ostream &out, const ClosureExpr *CE) {
Expand Down
75 changes: 46 additions & 29 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@

#include <limits>

// Regex lexing delivered via libSwift.
#include "swift/Parse/ExperimentalRegexBridging.h"
static RegexLiteralLexingFn regexLiteralLexingFn = nullptr;
void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn) {
regexLiteralLexingFn = fn;
}

using namespace swift;
using namespace swift::syntax;

Expand Down Expand Up @@ -1951,36 +1958,46 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
}
}

void Lexer::lexRegexLiteral(const char *TokStart) {
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
assert(*TokStart == '\'');

bool HadError = false;
while (true) {
// Check if we reached the end of the literal without terminating.
if (CurPtr >= BufferEnd || *CurPtr == '\n' || *CurPtr == '\r') {
diagnose(TokStart, diag::lex_unterminated_regex);
return formToken(tok::unknown, TokStart);
}
// We need to have experimental string processing enabled, and have the
// parsing logic for regex literals available.
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
return false;

const auto *CharStart = CurPtr;
uint32_t CharValue = validateUTF8CharacterAndAdvance(CurPtr, BufferEnd);
if (CharValue == ~0U) {
diagnose(CharStart, diag::lex_invalid_utf8);
HadError = true;
continue;
}
if (CharValue == '\\' && (*CurPtr == '\'' || *CurPtr == '\\')) {
// Skip escaped delimiter or \.
CurPtr++;
} else if (CharValue == '\'') {
// End of literal, stop.
break;
}
// Ask libswift to try and lex a regex literal.
// - Ptr will not be advanced if this is not for a regex literal.
// - ErrStr will be set if there is any error to emit.
// - CompletelyErroneous will be set if there was an error that cannot be
// recovered from.
auto *Ptr = TokStart;
const char *ErrStr = nullptr;
bool CompletelyErroneous = regexLiteralLexingFn(&Ptr, BufferEnd, &ErrStr);
if (ErrStr)
diagnose(TokStart, diag::regex_literal_parsing_error, ErrStr);

// If we didn't make any lexing progress, this isn't a regex literal and we
// should fallback to lexing as something else.
if (Ptr == TokStart)
return false;

// Update to point to where we ended regex lexing.
assert(Ptr > TokStart && Ptr <= BufferEnd);
CurPtr = Ptr;

// If the lexing was completely erroneous, form an unknown token.
if (CompletelyErroneous) {
assert(ErrStr);
formToken(tok::unknown, TokStart);
return true;
}
if (HadError)
return formToken(tok::unknown, TokStart);

// Otherwise, we either had a successful lex, or something that was
// recoverable.
assert(ErrStr || CurPtr[-1] == '\'');
formToken(tok::regex_literal, TokStart);
return true;
}

/// lexEscapedIdentifier:
Expand Down Expand Up @@ -2528,11 +2545,11 @@ void Lexer::lexImpl() {

case '\'':
// If we have experimental string processing enabled, and have the parsing
// logic for regex literals, lex a single quoted string as a regex literal.
if (LangOpts.EnableExperimentalStringProcessing &&
Parser_hasParseRegexStrawperson()) {
return lexRegexLiteral(TokStart);
}
// logic for regex literals, try to lex a single quoted string as a regex
// literal.
if (tryLexRegexLiteral(TokStart))
return;

// Otherwise lex as a string literal and emit a diagnostic.
LLVM_FALLTHROUGH;
case '"':
Expand Down
26 changes: 12 additions & 14 deletions lib/Parse/ParseRegex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,38 +22,36 @@

// Regex parser delivered via libSwift
#include "swift/Parse/ExperimentalRegexBridging.h"
static ParseRegexStrawperson parseRegexStrawperson = nullptr;
void Parser_registerParseRegexStrawperson(ParseRegexStrawperson fn) {
parseRegexStrawperson = fn;
}
// Exposes the presence of the regex parsing function to the lexer.
bool Parser_hasParseRegexStrawperson() {
return parseRegexStrawperson != nullptr;
static RegexLiteralParsingFn regexLiteralParsingFn = nullptr;
void Parser_registerRegexLiteralParsingFn(RegexLiteralParsingFn fn) {
regexLiteralParsingFn = fn;
}

using namespace swift;
using namespace swift::syntax;

ParserResult<Expr> Parser::parseExprRegexLiteral() {
assert(Tok.is(tok::regex_literal));
assert(parseRegexStrawperson);
assert(regexLiteralParsingFn);

SyntaxParsingContext LocalContext(SyntaxContext,
SyntaxKind::RegexLiteralExpr);
// Strip off delimiters.
auto rawText = Tok.getText();
assert(rawText.front() == '\'' && rawText.back() == '\'');
auto regexText = rawText.slice(1, rawText.size() - 1);

auto regexText = Tok.getText();

// Let the Swift library parse the contents, returning an error, or null if
// successful.
// TODO: We need to be able to pass back a source location to emit the error
// at.
auto *errorStr = parseRegexStrawperson(regexText.str().c_str());
const char *errorStr = nullptr;
unsigned version;
regexLiteralParsingFn(regexText.str().c_str(), &errorStr, &version,
/*captureStructureOut*/ nullptr,
/*captureStructureSize*/ 0);
if (errorStr)
diagnose(Tok, diag::regex_literal_parsing_error, errorStr);

auto loc = consumeToken();
return makeParserResult(
RegexLiteralExpr::createParsed(Context, loc, regexText));
RegexLiteralExpr::createParsed(Context, loc, regexText, version));
}
27 changes: 24 additions & 3 deletions lib/SILGen/SILGenApply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1910,10 +1910,31 @@ buildBuiltinLiteralArgs(SILGenFunction &SGF, SGFContext C,
RValue string = SGF.emitApplyAllocatingInitializer(
expr, strInitDecl, std::move(strLiteralArgs),
/*overriddenSelfType*/ Type(), SGFContext());
PreparedArguments args(
ArrayRef<AnyFunctionType::Param>({
AnyFunctionType::Param(ctx.getStringType())}));

// The version of the regex string.
// %3 = integer_literal $Builtin.IntLiteral <version>
auto versionIntLiteral =
ManagedValue::forUnmanaged(SGF.B.createIntegerLiteral(
expr, SILType::getBuiltinIntegerLiteralType(SGF.getASTContext()),
expr->getVersion()));

using Param = AnyFunctionType::Param;
auto builtinIntTy = versionIntLiteral.getType().getASTType();
PreparedArguments versionIntBuiltinArgs(ArrayRef<Param>{Param(builtinIntTy)});
versionIntBuiltinArgs.add(
expr, RValue(SGF, {versionIntLiteral}, builtinIntTy));

// %4 = function_ref Int.init(_builtinIntegerLiteral: Builtin.IntLiteral)
// %5 = apply %5(%3, ...) -> $Int
auto intLiteralInit = ctx.getIntBuiltinInitDecl(ctx.getIntDecl());
RValue versionInt = SGF.emitApplyAllocatingInitializer(
expr, intLiteralInit, std::move(versionIntBuiltinArgs),
/*overriddenSelfType*/ Type(), SGFContext());

PreparedArguments args(ArrayRef<Param>{Param(ctx.getStringType()),
Param(ctx.getIntType())});
args.add(expr, std::move(string));
args.add(expr, std::move(versionInt));
return args;
}

Expand Down
7 changes: 2 additions & 5 deletions lib/Sema/CSGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1266,18 +1266,15 @@ namespace {
ctx.Id_Regex.str());
return Type();
}
auto substringType = ctx.getSubstringType();
auto dynCapturesType = ctx.getDynamicCapturesType();
if (!dynCapturesType) {
ctx.Diags.diagnose(E->getLoc(),
diag::string_processing_lib_missing,
"DynamicCaptures");
return Type();
}
// TODO: Replace `(Substring, DynamicCaptures)` with type inferred from
// the regex.
auto matchType = TupleType::get({substringType, dynCapturesType}, ctx);
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
// TODO: Replace `DynamicCaptures` with type inferred from the regex.
return BoundGenericStructType::get(regexDecl, Type(), {dynCapturesType});
}

Type visitDeclRefExpr(DeclRefExpr *E) {
Expand Down
4 changes: 3 additions & 1 deletion libswift/Sources/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
# See http://swift.org/LICENSE.txt for license information
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors

add_subdirectory(ExperimentalRegex)
if(SWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING)
add_subdirectory(ExperimentalRegex)
endif()
add_subdirectory(SIL)
add_subdirectory(Optimizer)
13 changes: 11 additions & 2 deletions libswift/Sources/ExperimentalRegex/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,16 @@
# See http://swift.org/LICENSE.txt for license information
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors

file(GLOB_RECURSE _LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES
"${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}/Sources/_MatchingEngine/*.swift")
set(LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES)
foreach(source ${_LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES})
file(TO_CMAKE_PATH "${source}" source)
list(APPEND LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES ${source})
endforeach()
message(STATUS "Using Experimental String Processing library for libswift ExperimentalRegex (${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}).")

add_libswift_module(ExperimentalRegex
Regex.swift
)
"${LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES}"
Regex.swift)

0 comments on commit 7bff9da

Please sign in to comment.