311 changes: 311 additions & 0 deletions src/dmd/parse.d
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import dmd.root.outbuffer;
import dmd.root.rmem;
import dmd.root.rootobject;
import dmd.tokens;
import dmd.utf;

// How multiple declarations are parsed.
// If 1, treat as C.
Expand Down Expand Up @@ -7745,6 +7746,13 @@ final class Parser(AST) : Lexer
break;

case TOK.string_:
if (global.params.interpolateStrings && token.ptr[0] == 'i')
{
e = parseInterpolatedString(token);
nextToken();
break;
}
goto case TOK.hexadecimalString;
case TOK.hexadecimalString:
{
// cat adjacent strings
Expand All @@ -7763,6 +7771,10 @@ final class Parser(AST) : Lexer
error("mismatched string literal postfixes `'%c'` and `'%c'`", postfix, token.postfix);
postfix = token.postfix;
}
if (token.ptr[0] == 'i')
{
error("cannot implicitly concatenate interpolated strings with non-interpolated strings");
}

error("Implicit string concatenation is deprecated, use %s ~ %s instead",
prev.toChars(), token.toChars());
Expand Down Expand Up @@ -8996,6 +9008,305 @@ final class Parser(AST) : Lexer
token.lineComment = null;
}
}

/**
Parse the given interpolated string into a tuple of expressions.
Params:
token = the interpolated string token
Returns:
A tuple of expressions from the interpolated string.
*/
AST.TupleExp parseInterpolatedString(Token token)
in { assert(token.value == TOK.string_ && token.ptr[0] == 'i'); } body
{
//fprintf(stderr, "parseInterpolatedString `%.*s`\n", token.len, token.ustring);

auto parts = new AST.Expressions();

// Used to map string contents back to original source location
auto sourcePos = StringSourcePos(token.ptr);

auto str = token.ustring;
auto len = token.len;
size_t mark = 0;
auto markSourcePos = sourcePos;
size_t next = 0;
size_t doubleDollar = size_t.max;

KmainLoop:
while(true)
{
auto endOfRawCharacters = next;
if (next < len)
{
auto nextChar = sourcePos.scan(str, next);
if (nextChar != '$')
continue;

if (next >= len)
{
error(sourcePos.loc.from(token.loc), "unfinished interpolated string expression '$'");
break;
}
if (str[next] == '$') // handle $$
{
if (doubleDollar == size_t.max)
doubleDollar = next - 1;
sourcePos.scan(str, next);
continue;
}
}

// Add next string expression
if (endOfRawCharacters > mark)
{
auto markLoc = markSourcePos.loc.from(token.loc);
if (doubleDollar == size_t.max)
{
parts.push(new AST.StringExp(markLoc, cast(char*)str + mark, endOfRawCharacters - mark, token.postfix));
}
else
{
auto buffer = cast(char*)mem.xmalloc(endOfRawCharacters - 1 - mark);
size_t offset;
{
auto length = doubleDollar + 1 - mark;
buffer[0 .. length] = str[mark .. doubleDollar + 1];
offset = length;
}
for (size_t i = doubleDollar + 2; i < endOfRawCharacters; i++)
{
auto c = str[i];
if (c == '$')
{
i++;
assert(i < endOfRawCharacters && str[i] == '$');
}
buffer[offset++] = c;
}
parts.push(new AST.StringExp(markLoc, buffer, offset, token.postfix));
doubleDollar = size_t.max;
}
}

if (next >= len)
break;

// Process the '$' expression
if (str[next] == '(')
{
sourcePos.scan(str, next);
mark = next;
for(uint depth = 1;;)
{
if (next >= len)
{
error(sourcePos.loc.from(token.loc), "unfinished interpolated string expression '$(...)'");
break KmainLoop;
}
auto nextChar = sourcePos.scan(str, next);
if (nextChar == ')')
{
depth--;
if (depth == 0)
break;
}
else if (nextChar == '(')
{
depth++;
}
}
{
auto writeableStr = cast(char*)str;
// Need to null-terminate so the parser does not scan past the end of
// the expression. A case has been found where the parser will scan
// past the expression without this, namely, i"$(var)'"
writeableStr[next - 1] = '\0';
scope(exit) writeableStr[next - 1] = ')';

auto expr = str[mark .. next - 1];
//printf("parsing the expression '%s'\n", expr.ptr);
scope tempParser = new Parser!AST(mod, expr, false, diagnosticReporter);
tempParser.scanloc = sourcePos.loc.from(token.loc);
tempParser.nextToken();
if (tempParser.token.value != TOK.endOfFile)
{
auto result = tempParser.parseExpression();
if (tempParser.token.value != TOK.endOfFile)
{
error(sourcePos.loc.from(token.loc), "invalid expression '%s' inside interpolated string", expr.ptr);
break;
}
parts.push(result);
}
}
mark = next;
markSourcePos = sourcePos;
}
else
{
// TODO: if we want to support `$` expressions without parentheses, this is
// where we would add support for it. Maybe a good grammar node for this
// would be DotIdentifier.
error(sourcePos.loc.from(token.loc), "missing parentheses in interpolated string expression '$(...)'");
break;
}
}

return new AST.TupleExp(token.loc, parts);
}

/**
Represents a location offset from a `Loc`.
*/
static struct LocOffset
{
uint offset; /// byte offset from base loc
uint line; /// line offset from base loc
uint lastLineOffset; /// byte offset of the start of the last line

/**
Indicates 'offset' has been moved to the next line.
*/
void atNextLine()
{
line++;
lastLineOffset = offset;
}

/**
Get location relative to `baseLoc`.
Params:
baseLoc = relative base location
Returns:
location relative to `baseLoc`
*/
Loc from(ref const Loc baseLoc) const
{
return Loc(baseLoc.filename,
baseLoc.linnum + line,
(line == 0) ? baseLoc.charnum + offset : offset - lastLineOffset);
}
}

/**
Used to map offsets in a processed string back to the source location
*/
private static struct StringSourcePos
{
private const(char)* sourcePtr;
private bool wysiwyg;
LocOffset loc;

this(const char* sourcePtr)
{
this.sourcePtr = sourcePtr;
loc.offset = 2;
if (sourcePtr[1] == '"')
wysiwyg = false;
else
{
wysiwyg = true;
if (sourcePtr[1] == 'r') // ir"
loc.offset++;
else if (sourcePtr[1] == '`') // i`
{ }
else
{
assert(sourcePtr[1] == 'q', "code bug");
loc.offset = 3;
if (sourcePtr[2] != '{')
{
assert(sourcePtr[2] == '"', "code bug");
char c = sourcePtr[3];
bool isheredoc;
if (c >= 0x80)
{
import dmd.utf : utf_decodeChar, isUniAlpha;
size_t tempOffset = 3;
dchar fullChar;
assert(!utf_decodeChar(sourcePtr, size_t.max, tempOffset, fullChar), "code bug");
isheredoc = isUniAlpha(fullChar);
}
else
{
import core.stdc.ctype : isalpha;
isheredoc = (isalpha(c) || c == '_');
}
if (isheredoc)
loc.offset = 3 + cast(uint)(strchr(sourcePtr + 3, '\n') - (sourcePtr + 3)) + 1;
else
loc.offset = 4;
}
}
}
}

/**
Read and move past the next character both in the source string and
in the processed string.
Params:
str = the processed string
ridx = in/out index into the processed string
Returns:
the next character
*/
dchar scan(const(char)* str, ref size_t ridx)
{
dchar sourceChar = sourcePtr[loc.offset++];
dchar strChar;
if (sourceChar == '\r')
{
if (sourcePtr[loc.offset] == '\n')
{
loc.offset++;
loc.atNextLine();
sourceChar = '\n';
}
strChar = str[ridx++];
}
else if (wysiwyg || sourceChar != '\\')
{
strChar = str[ridx++];
if (sourceChar == '\n')
loc.atNextLine();
}
else
{
scope ignore = new IgnoreDiagnosticReporter();
auto escapeStart = sourcePtr + loc.offset;
if (escapeStart[0] == 'u' || escapeStart[0] == 'U' || escapeStart[0] == '&')
assert(!utf_decodeChar(str, size_t.max, ridx, strChar), "code bug");
else
strChar = str[ridx++];

auto escapeEnd = escapeStart;
auto tempLoc = Loc(); // TODO: get the current location correctly
sourceChar = Lexer.escapeSequence(tempLoc, ignore, escapeEnd);
loc.offset += (escapeEnd - escapeStart);
}
assert(strChar == sourceChar/*, "strChar `" ~ strChar ~ "' != sourceChar '" ~ sourceChar ~ "'"*/);
return strChar;
}
}
private static class IgnoreDiagnosticReporter : DiagnosticReporter
{
import core.stdc.stdarg : va_list;
private int _errorCount;
private int _warningCount;
private int _deprecationCount;
override int errorCount() { return _errorCount; }
override int warningCount() { return _warningCount; }
override int deprecationCount() { return _deprecationCount; }
override void error(const ref Loc loc, const(char)* format, va_list args) { _errorCount++; }
override void errorSupplemental(const ref Loc loc, const(char)* format, va_list) { _errorCount++; }
override void warning(const ref Loc loc, const(char)* format, va_list args) { _warningCount++; }
override void warningSupplemental(const ref Loc loc, const(char)* format, va_list) { _warningCount++; }
override void deprecation(const ref Loc loc, const(char)* format, va_list args) { _deprecationCount++; }
override void deprecationSupplemental(const ref Loc loc, const(char)* format, va_list) { _deprecationCount++; }
}
}

enum PREC : int
Expand Down
4 changes: 2 additions & 2 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ $(RESULTS_DIR)/d_do_test$(EXE): tools/d_do_test.d $(RESULTS_DIR)/.created
@echo "OS: '$(OS)'"
@echo "MODEL: '$(MODEL)'"
@echo "PIC: '$(PIC_FLAG)'"
$(DMD) -conf= $(MODEL_FLAG) $(DEBUG_FLAGS) -unittest -run $<
$(DMD) -conf= $(MODEL_FLAG) $(DEBUG_FLAGS) -od$(RESULTS_DIR) -of$(RESULTS_DIR)$(DSEP)d_do_test$(EXE) $<
$(DMD) -conf= $(MODEL_FLAG) $(DEBUG_FLAGS) -transition=interpolate -unittest -run $<
$(DMD) -conf= $(MODEL_FLAG) $(DEBUG_FLAGS) -transition=interpolate -od$(RESULTS_DIR) -of$(RESULTS_DIR)$(DSEP)d_do_test$(EXE) $<

$(RESULTS_DIR)/sanitize_json$(EXE): tools/sanitize_json.d $(RESULTS_DIR)/.created
@echo "Building sanitize_json tool"
Expand Down
33 changes: 33 additions & 0 deletions test/fail_compilation/istring1.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
PERMUTE_ARGS:
REQUIRED_ARGS: -transition=interpolate
TEST_OUTPUT:
---
fail_compilation/istring1.d(16): Error: missing parentheses in interpolated string expression '$(...)'
fail_compilation/istring1.d(23): Error: unfinished interpolated string expression '$(...)'
fail_compilation/istring1.d(26): Error: unfinished interpolated string expression '$'
fail_compilation/istring1.d(29): Error: invalid expression '1 + 2;' inside interpolated string
fail_compilation/istring1.d(32): Error: undefined escape sequence \c
fail_compilation/istring1.d(33): Error: unterminated named entity &quot";
---
*/
enum s1 = i`

$!

`;
enum s2 = i`

$(

`;
enum s3 = i`

$`;
enum s4 = i`

$(1 + 2;)`;

// Test that bad escape sequences are handled sanely
enum s5 = i"\c";
enum s6 = i"\&quot";
11 changes: 11 additions & 0 deletions test/fail_compilation/istring2.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
PERMUTE_ARGS:
REQUIRED_ARGS: -transition=interpolate
TEST_OUTPUT:
---
fail_compilation/istring2.d(11): Error: undefined identifier `a`
---
*/
enum s1 = i`

$(a)`;
2 changes: 2 additions & 0 deletions test/run.d
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Options:

// bootstrap all needed environment variables
auto env = getEnvironment;
hostDMD = buildPath("..", "generated", env["OS"], env["BUILD"], env["MODEL"], "dmd" ~ env["EXE"]);

if (runUnitTests)
{
Expand Down Expand Up @@ -162,6 +163,7 @@ void ensureToolsExists(const TestTool[] tools ...)
const command = [
hostDMD,
"-of"~targetBin,
"-transition=interpolate",
sourceFile
] ~ tool.extraArgs;

Expand Down
72 changes: 72 additions & 0 deletions test/runnable/istring.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
PERMUTE_ARGS:
REQUIRED_ARGS: -transition=interpolate
*/
import std.conv : text;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should try to avoid using Phobos in the DMD testsuite. Maybe we can add a very reduced and minimalistic version of text?


static assert(i"$()".length == 0);
static assert(i"$(/* a comment!*/)".length == 0);
static assert(i"$(// another comment)".length == 0);
static assert(i"$(/+ yet another comment+/)".length == 0);

void main()
{
int a = 42;
assert("a is 42" == text(i"a is $(a)"));
assert("a + 23 is 65" == text(i"a + 23 is $(a + 23)"));

// test each type of string literal
int b = 93;
assert("42 + 93 = 135" == text( i"$(a) + $(b) = $(a + b)")); // double-quote
assert("42 + 93 = 135" == text( ir"$(a) + $(b) = $(a + b)")); // wysiwyg
assert("42 + 93 = 135" == text( i`$(a) + $(b) = $(a + b)`)); // wysiwyg (alt)
assert("42 + 93 = 135" == text( iq{$(a) + $(b) = $(a + b)})); // token
assert("42 + 93 = 135" == text(iq"!$(a) + $(b) = $(a + b)!")); // delimited (char)
assert("42 + 93 = 135\n" == text(iq"ABC
$(a) + $(b) = $(a + b)
ABC")); // delimited (heredoc)

// Escaping double dollar
assert("$" == i"$$"[0]);
assert(" $ " == i" $$ "[0]);
assert(" $(just raw string) " == i" $$(just raw string) "[0]);
assert("Double dollar $$ becomes $" == text( i"Double dollar $$$$ becomes $$")); // double-quote
assert("Double dollar $$ becomes $" == text( ir"Double dollar $$$$ becomes $$")); // wysiwyg
assert("Double dollar $$ becomes $" == text( i`Double dollar $$$$ becomes $$`)); // wysiwyg (alt)
assert("Double dollar $$ becomes $" == text( iq{Double dollar $$$$ becomes $$})); // token
assert("Double dollar $$ becomes $" == text(iq"!Double dollar $$$$ becomes $$!")); // delimited

assert(928 == add(900, 28));
}

string funcCode(string attributes, string returnType, string name, string args, string body)
{
return text(iq{
$(attributes) $(returnType) $(name)($(args))
{
$(body)
}
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps add tests with other string literal syntaxes as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and prolly the suffixes. i"foo $(bar)"w should translate to ("foo "w, bar) or something; just repeat the suffixes an the generated string literals inside.

}
mixin(funcCode("pragma(inline)", "int", "add", "int a, int b", "return a + b;"));

// Test interpolated strings with escape sequences
static assert(i" foo \n bar".length == 1);
static assert(i"foo \x0a bar".length == 1);
static assert(i"foo \xC2\xA2 bar".length == 1);
static assert(i"foo \u042f bar".length == 1);
static assert(i"foo \U00010f063 bar".length == 1);
static assert(i"foo \0 bar".length == 1);
static assert(i"foo \1 bar".length == 1);
static assert(i"foo \7 bar".length == 1);
static assert(i"foo \01 bar".length == 1);
static assert(i"foo \001 bar".length == 1);
static assert(i"foo \377 bar".length == 1);
static assert(i"foo &quot; bar".length == 1);

// Test string literals with odd newlines
static assert(i"
".length == 1);
// test carriage return
static assert(i"
".length == 1);
33 changes: 17 additions & 16 deletions test/tools/d_do_test.d
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,7 @@ string envGetRequired(in char[] name)
auto value = environment.get(name);
if(value is null)
{
writefln("Error: missing environment variable '%s', was this called this through the Makefile?",
name);
writeln(i"Error: missing environment variable '$(name)', was this called this through the Makefile?");
throw new SilentQuit();
}
return value;
Expand Down Expand Up @@ -733,13 +732,13 @@ int tryMain(string[] args)
string objfile = output_dir ~ envData.sep ~ test_name ~ "_" ~ to!string(permuteIndex) ~ envData.obj;
toCleanup ~= objfile;

command = format("%s -conf= -m%s -I%s %s %s -od%s -of%s %s %s%s %s", envData.dmd, envData.model, input_dir,
reqArgs, permutedArgs, output_dir,
(testArgs.mode == TestMode.RUN || testArgs.link ? test_app_dmd : objfile),
argSet,
(testArgs.mode == TestMode.RUN || testArgs.link ? "" : "-c "),
join(testArgs.sources, " "),
(autoCompileImports ? "-i" : join(testArgs.compiledImports, " ")));
command = text(
i"$(envData.dmd) -conf= -m$(envData.model) -I$(input_dir) $(reqArgs) ",
i"$(permutedArgs) -od$(output_dir) -of",
(testArgs.mode == TestMode.RUN || testArgs.link) ? test_app_dmd : objfile,
i` $(argSet) $(testArgs.mode == TestMode.RUN || testArgs.link ? "" : "-c ") `,
join(testArgs.sources, " "), " ",
(autoCompileImports ? "-i" : join(testArgs.compiledImports, " ")));
version(Windows) command ~= " -map nul.map";

compile_output = execute(fThisRun, command, testArgs.mode != TestMode.FAIL_COMPILE, result_path);
Expand All @@ -751,18 +750,21 @@ int tryMain(string[] args)
string newo= result_path ~ replace(replace(filename, ".d", envData.obj), envData.sep~"imports"~envData.sep, envData.sep);
toCleanup ~= newo;

command = format("%s -conf= -m%s -I%s %s %s -od%s -c %s %s", envData.dmd, envData.model, input_dir,
reqArgs, permutedArgs, output_dir, argSet, filename);
command = text(
i"$(envData.dmd) -conf= -m$(envData.model) -I$(input_dir) $(reqArgs) ",
i"$(permutedArgs) -od$(output_dir) -c $(argSet) $(filename)");
compile_output ~= execute(fThisRun, command, testArgs.mode != TestMode.FAIL_COMPILE, result_path);
}

if (testArgs.mode == TestMode.RUN || testArgs.link)
{
// link .o's into an executable
command = format("%s -conf= -m%s%s%s %s %s -od%s -of%s %s", envData.dmd, envData.model,
command = text(
i"$(envData.dmd) -conf= -m$(envData.model)",
autoCompileImports ? " -i" : "",
autoCompileImports ? "extraSourceIncludePaths" : "",
envData.required_args, testArgs.requiredArgsForLink, output_dir, test_app_dmd, join(toCleanup, " "));
i" $(envData.required_args) $(testArgs.requiredArgsForLink) -od$(output_dir)",
i" -of$(test_app_dmd) $(join(toCleanup, ` `))");
version(Windows) command ~= " -map nul.map";

execute(fThisRun, command, true, result_path);
Expand Down Expand Up @@ -870,11 +872,10 @@ int tryMain(string[] args)
}
f.writeln();
f.writeln("==============================");
f.writef("Test %s failed: ", input_file);
f.writeln(e.msg);
f.writeln(i"Test $(input_file) failed: $(e.msg)");
f.close();

writefln("\nTest %s failed. The logged output:", input_file);
writeln("\nTest $(input_file) failed. The logged output:");
auto outputText = output_file.readText;
writeln(outputText);
output_file.remove();
Expand Down