Skip to content

Commit

Permalink
Fix <rdar://14296004> [QoI] Poor diagnostic/recovery when two operato…
Browse files Browse the repository at this point in the history
…rs (e.g., == and -) are adjacted without spaces.

This is a frequently reported and surprising issue where lack of whitespace leads to
rejecting common code like "X*-4".  Fix this by diagnosing it specifically as a lack
of whitespace problem, including a fixit to insert the missing whitespace (to transform
it into "X * -4".  This even handles the cases where there are multiple valid (single)
splits possible by emitting a series of notes.
  • Loading branch information
lattner committed Dec 16, 2015
1 parent d221401 commit e28c2e2
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 16 deletions.
18 changes: 15 additions & 3 deletions include/swift/AST/DiagnosticsSema.def
Expand Up @@ -420,16 +420,28 @@ ERROR(ambiguous_module_type,sema_nb,none,
ERROR(use_nonmatching_operator,sema_nb,none,
"%0 is not a %select{binary|prefix unary|postfix unary}1 operator",
(Identifier, unsigned))

ERROR(unspaced_operators_fixit,sema_nb,none,
"missing whitespace between %0 and %1 operators",
(Identifier, Identifier, bool))
ERROR(unspaced_operators,sema_nb,none,
"ambiguous missing whitespace between unary and binary operators", ())
NOTE(unspaced_operators_candidate,sema_nb,none,
"could be %select{binary|postfix}2 %0 and %select{prefix|binary}2 %1",
(Identifier, Identifier, bool))



ERROR(use_unresolved_identifier,sema_nb,none,
"use of unresolved %select{identifier|operator}1 %0", (Identifier, bool))
ERROR(use_undeclared_type,sema_nb,none,
"use of undeclared type %0", (Identifier))
ERROR(use_undeclared_type_did_you_mean,sema_nb,none,
"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
NOTE(note_remapped_type,sema_nb,none,
"did you mean to use '%0'?", (StringRef))
"did you mean to use '%0'?", (StringRef))
ERROR(identifier_init_failure,sema_nb,none,
"could not infer type for %0", (Identifier))
"could not infer type for %0", (Identifier))
ERROR(pattern_used_in_type,sema_nb,none,
"%0 used within its own type", (Identifier))
NOTE(note_module_as_type,sema_nb,none,
Expand Down
13 changes: 13 additions & 0 deletions include/swift/Parse/Lexer.h
Expand Up @@ -240,6 +240,15 @@ class Lexer {
restoreState(S);
}

/// \brief Retrieve the Token referred to by \c Loc.
///
/// \param SM The source manager in which the given source location
/// resides.
///
/// \param Loc The source location of the beginning of a token.
static Token getTokenAtLocation(const SourceManager &SM, SourceLoc Loc);


/// \brief Retrieve the source location that points just past the
/// end of the token referred to by \c Loc.
///
Expand Down Expand Up @@ -299,6 +308,10 @@ class Lexer {
/// reserved word.
static tok kindOfIdentifier(StringRef Str, bool InSILMode);

/// \brief Determines if the given string is a valid operator identifier,
/// without escaping characters.
static bool isOperator(StringRef string);

SourceLoc getLocForStartOfBuffer() const {
return SourceLoc(llvm::SMLoc::getFromPointer(BufferStart));
}
Expand Down
26 changes: 21 additions & 5 deletions lib/Parse/Lexer.cpp
Expand Up @@ -523,6 +523,18 @@ bool Lexer::isIdentifier(StringRef string) {
return p == end;
}

/// \brief Determines if the given string is a valid operator identifier,
/// without escaping characters.
bool Lexer::isOperator(StringRef string) {
if (string.empty()) return false;
char const *p = string.data(), *end = string.end();
if (!advanceIfValidStartOfOperator(p, end))
return false;
while (p < end && advanceIfValidContinuationOfOperator(p, end));
return p == end;
}


tok Lexer::kindOfIdentifier(StringRef Str, bool InSILMode) {
tok Kind = llvm::StringSwitch<tok>(Str)
#define KEYWORD(kw) \
Expand Down Expand Up @@ -1664,15 +1676,15 @@ void Lexer::lexImpl() {
}
}

SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc) {
// Don't try to do anything with an invalid location.
if (!Loc.isValid())
return Loc;
return Token();

// Figure out which buffer contains this location.
int BufferID = SM.findBufferContainingLoc(Loc);
if (BufferID < 0)
return SourceLoc();
return Token();

// Use fake language options; language options only affect validity
// and the exact token produced.
Expand All @@ -1685,10 +1697,14 @@ SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
Lexer L(FakeLangOpts, SM, BufferID, nullptr, /*InSILMode=*/ false,
CommentRetentionMode::ReturnAsTokens);
L.restoreState(State(Loc));
unsigned Length = L.peekNextToken().getLength();
return Loc.getAdvancedLoc(Length);
return L.peekNextToken();
}

SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
return Loc.getAdvancedLoc(getTokenAtLocation(SM, Loc).getLength());
}


static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
unsigned BufferID,
unsigned Offset,
Expand Down
137 changes: 134 additions & 3 deletions lib/Sema/TypeCheckConstraints.cpp
Expand Up @@ -281,6 +281,132 @@ static bool matchesDeclRefKind(ValueDecl *value, DeclRefKind refKind) {
llvm_unreachable("bad declaration reference kind");
}

static bool containsDeclRefKind(LookupResult &lookupResult,
DeclRefKind refKind) {
for (auto candidate : lookupResult) {
ValueDecl *D = candidate.Decl;
if (!D || !D->hasType())
continue;
if (matchesDeclRefKind(D, refKind))
return true;
}
return false;
}

/// Emit a diagnostic with a fixit hint for an invalid binary operator, showing
/// how to split it according to splitCandidate.
static void diagnoseOperatorSplit(UnresolvedDeclRefExpr *UDRE,
std::pair<unsigned, bool> splitCandidate,
Diag<Identifier, Identifier, bool> diagID,
TypeChecker &TC) {

unsigned splitLoc = splitCandidate.first;
bool isBinOpFirst = splitCandidate.second;
StringRef nameStr = UDRE->getName().str();
auto startStr = nameStr.substr(0, splitLoc);
auto endStr = nameStr.drop_front(splitLoc);

// One valid split found, it is almost certainly the right answer.
auto diag = TC.diagnose(UDRE->getLoc(), diagID,
TC.Context.getIdentifier(startStr),
TC.Context.getIdentifier(endStr), isBinOpFirst);
// Highlight the whole operator.
diag.highlight(UDRE->getLoc());
// Insert whitespace on the left if the binop is at the start, or to the
// right if it is end.
if (isBinOpFirst)
diag.fixItInsert(UDRE->getLoc(), " ");
else
diag.fixItInsertAfter(UDRE->getLoc(), " ");

// Insert a space between the operators.
diag.fixItInsert(UDRE->getLoc().getAdvancedLoc(splitLoc), " ");
}

/// If we failed lookup of a binary operator, check to see it to see if
/// it is a binary operator juxtaposed with a unary operator (x*-4) that
/// needs whitespace. If so, emit specific diagnostics for it and return true,
/// otherwise return false.
static bool diagnoseJuxtaposedBinOp(UnresolvedDeclRefExpr *UDRE,
DeclContext *DC,
TypeChecker &TC) {
Identifier name = UDRE->getName();
StringRef nameStr = name.str();
if (!name.isOperator() || nameStr.size() < 2 ||
UDRE->getRefKind() != DeclRefKind::BinaryOperator)
return false;

// Relex the token, to decide whether it has whitespace around it or not. If
// it does, it isn't likely to be a case where a space was forgotten.
auto tok = Lexer::getTokenAtLocation(TC.Context.SourceMgr, UDRE->getLoc());
if (tok.getKind() != tok::oper_binary_unspaced)
return false;

// Okay, we have a failed lookup of a multicharacter unspaced binary operator.
// Check to see if lookup succeeds if a prefix or postfix operator is split
// off, and record the matches found. The bool indicated is false if the
// first half of the split is the unary operator (x!*4) or true if it is the
// binary operator (x*+4).
std::vector<std::pair<unsigned, bool>> WorkableSplits;

// Check all the potential splits.
for (unsigned splitLoc = 1, e = nameStr.size(); splitLoc != e; ++splitLoc) {
// For it to be a valid split, the start and end section must be valid
// operators, spliting a unicode code point isn't kosher.
auto startStr = nameStr.substr(0, splitLoc);
auto endStr = nameStr.drop_front(splitLoc);
if (!Lexer::isOperator(startStr) || !Lexer::isOperator(endStr))
continue;

auto startName = TC.Context.getIdentifier(startStr);
auto endName = TC.Context.getIdentifier(endStr);

// Perform name lookup for the first and second pieces. If either fail to
// be found, then it isn't a valid split.
NameLookupOptions LookupOptions = defaultUnqualifiedLookupOptions;
if (isa<AbstractFunctionDecl>(DC))
LookupOptions |= NameLookupFlags::KnownPrivate;
auto startLookup = TC.lookupUnqualified(DC, startName, UDRE->getLoc(),
LookupOptions);
if (!startLookup) continue;
auto endLookup = TC.lookupUnqualified(DC, endName, UDRE->getLoc(),
LookupOptions);
if (!endLookup) continue;

// Look to see if the candidates found could possibly match.
if (containsDeclRefKind(startLookup, DeclRefKind::PostfixOperator) &&
containsDeclRefKind(endLookup, DeclRefKind::BinaryOperator))
WorkableSplits.push_back({ splitLoc, false });

if (containsDeclRefKind(startLookup, DeclRefKind::BinaryOperator) &&
containsDeclRefKind(endLookup, DeclRefKind::PrefixOperator))
WorkableSplits.push_back({ splitLoc, true });
}

switch (WorkableSplits.size()) {
case 0:
// No splits found, can't produce this diagnostic.
return false;
case 1:
// One candidate: produce an error with a fixit on it.
diagnoseOperatorSplit(UDRE, WorkableSplits[0],
diag::unspaced_operators_fixit, TC);
return true;

default:
// Otherwise, we have to produce a series of notes listing the various
// options.
TC.diagnose(UDRE->getLoc(), diag::unspaced_operators)
.highlight(UDRE->getLoc());

for (auto candidateSplit : WorkableSplits)
diagnoseOperatorSplit(UDRE, candidateSplit,
diag::unspaced_operators_candidate, TC);
return true;
}
}


/// Bind an UnresolvedDeclRefExpr by performing name lookup and
/// returning the resultant expression. Context is the DeclContext used
/// for the lookup.
Expand All @@ -297,9 +423,14 @@ resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE, DeclContext *DC) {
auto Lookup = lookupUnqualified(DC, Name, Loc, LookupOptions);

if (!Lookup) {
diagnose(Loc, diag::use_unresolved_identifier, Name,
UDRE->getName().isOperator())
.highlight(Loc);
// If we failed lookup of a binary operator, check to see it to see if
// it is a binary operator juxtaposed with a unary operator (x*-4) that
// needs whitespace. If so, emit specific diagnostics for it.
if (!diagnoseJuxtaposedBinOp(UDRE, DC, *this)) {
diagnose(Loc, diag::use_unresolved_identifier, Name,
UDRE->getName().isOperator())
.highlight(Loc);
}
return new (Context) ErrorExpr(Loc);
}

Expand Down
11 changes: 6 additions & 5 deletions test/decl/operators.swift
Expand Up @@ -172,10 +172,9 @@ func ??= <T>(inout result : T?, rhs : Int) { // ok




_ = n*-4 // expected-error {{use of unresolved operator '*-'}}

if n==-1 {} // expected-error {{use of unresolved operator '==-'}}
// <rdar://problem/14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.
_ = n*-4 // expected-error {{missing whitespace between '*' and '-' operators}} {{6-6= }} {{7-7= }}
if n==-1 {} // expected-error {{missing whitespace between '==' and '-' operators}} {{5-5= }} {{7-7= }}

prefix operator ☃⃠ {}
prefix func☃⃠(a : Int) -> Int { return a }
Expand All @@ -184,8 +183,10 @@ postfix func☃⃠(a : Int) -> Int { return a }

_ = n☃⃠ ☃⃠ n // Ok.
_ = n ☃⃠ ☃⃠n // Ok.
_ = n☃⃠☃⃠n // expected-error {{use of unresolved operator '☃⃠☃⃠'}}
_ = n ☃⃠☃⃠ n // expected-error {{use of unresolved operator '☃⃠☃⃠'}}
_ = n☃⃠☃⃠n // expected-error {{ambiguous missing whitespace between unary and binary operators}}
// expected-note @-1 {{could be binary '☃⃠' and prefix '☃⃠'}} {{12-12= }} {{18-18= }}
// expected-note @-2 {{could be postfix '☃⃠' and binary '☃⃠'}} {{6-6= }} {{12-12= }}



Expand Down

0 comments on commit e28c2e2

Please sign in to comment.