Skip to content

Commit

Permalink
Merge pull request #4222 from 9rnsr/fix13907
Browse files Browse the repository at this point in the history
Issue 13907 - Surrogate pairs in wchar string literal will cause incorrect length match
  • Loading branch information
WalterBright committed Dec 31, 2014
2 parents 5544787 + 6d73218 commit 4e21e56
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 40 deletions.
19 changes: 12 additions & 7 deletions src/cast.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,15 +577,19 @@ MATCH implicitConvTo(Expression *e, Type *t)
case Tsarray:
if (e->type->ty == Tsarray)
{
if (((TypeSArray *)e->type)->dim->toInteger() !=
((TypeSArray *)t)->dim->toInteger())
return;
TY tynto = t->nextOf()->ty;
if (tynto == tyn)
{
result = MATCHexact;
if (((TypeSArray *)e->type)->dim->toInteger() ==
((TypeSArray *)t)->dim->toInteger())
{
result = MATCHexact;
}
return;
}
int szto = t->nextOf()->size();
if (e->length(szto) != ((TypeSArray *)t)->dim->toInteger())
return;
if (!e->committed && (tynto == Tchar || tynto == Twchar || tynto == Tdchar))
{
result = MATCHexact;
Expand All @@ -594,10 +598,10 @@ MATCH implicitConvTo(Expression *e, Type *t)
}
else if (e->type->ty == Tarray)
{
if (e->length() >
((TypeSArray *)t)->dim->toInteger())
return;
TY tynto = t->nextOf()->ty;
int sznto = t->nextOf()->size();
if (e->length(sznto) != ((TypeSArray *)t)->dim->toInteger())
return;
if (tynto == tyn)
{
result = MATCHexact;
Expand All @@ -609,6 +613,7 @@ MATCH implicitConvTo(Expression *e, Type *t)
return;
}
}
/* fall through */
case Tarray:
case Tpointer:
Type *tn = t->nextOf();
Expand Down
36 changes: 22 additions & 14 deletions src/expression.c
Original file line number Diff line number Diff line change
Expand Up @@ -3770,45 +3770,53 @@ Expression *StringExp::semantic(Scope *sc)
}

/**********************************
* Return length of string.
* Return the code unit count of string.
* Input:
* encSize code unit size of the target encoding.
*/

size_t StringExp::length()
size_t StringExp::length(int encSize)
{
assert(encSize == 1 || encSize == 2 || encSize == 4);
if (sz == encSize)
return len;

size_t result = 0;
dchar_t c;
const char *p;

switch (sz)
{
case 1:
for (size_t u = 0; u < len;)
{
p = utf_decodeChar((utf8_t *)string, len, &u, &c);
if (p)
{ error("%s", p);
if (const char *p = utf_decodeChar((utf8_t *)string, len, &u, &c))
{
error("%s", p);
return 0;
}
else
result++;
result += utf_codeLength(encSize, c);
}
break;

case 2:
for (size_t u = 0; u < len;)
{
p = utf_decodeWchar((unsigned short *)string, len, &u, &c);
if (p)
{ error("%s", p);
if (const char *p = utf_decodeWchar((utf16_t *)string, len, &u, &c))
{
error("%s", p);
return 0;
}
else
result++;
result += utf_codeLength(encSize, c);
}
break;

case 4:
result = len;
for (size_t u = 0; u < len;)
{
c = *((utf32_t *)((char *)string + u));
u += 4;
result += utf_codeLength(encSize, c);
}
break;

default:
Expand Down
2 changes: 1 addition & 1 deletion src/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ class StringExp : public Expression
static StringExp *create(Loc loc, char *s);
bool equals(RootObject *o);
Expression *semantic(Scope *sc);
size_t length();
size_t length(int encSize = 4);
StringExp *toStringExp();
StringExp *toUTF8(Scope *sc);
int compare(RootObject *obj);
Expand Down
13 changes: 8 additions & 5 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -873,12 +873,15 @@ Initializer *ExpInitializer::semantic(Scope *sc, Type *t, NeedInterpret needInte
* Allow this by doing an explicit cast, which will lengthen the string
* literal.
*/
if (exp->op == TOKstring && tb->ty == Tsarray && ti->ty == Tsarray)
if (exp->op == TOKstring && tb->ty == Tsarray)
{
StringExp *se = (StringExp *)exp;
if (!se->committed && se->type->ty == Tsarray &&
((TypeSArray *)se->type)->dim->toInteger() <
((TypeSArray *)t)->dim->toInteger())
Type *typeb = se->type->toBasetype();
TY tynto = tb->nextOf()->ty;
if (!se->committed &&
(typeb->ty == Tarray || typeb->ty == Tsarray) &&
(tynto == Tchar || tynto == Twchar || tynto == Tdchar) &&
se->length(tb->nextOf()->size()) < ((TypeSArray *)tb)->dim->toInteger())
{
exp = se->castTo(sc, t);
goto L1;
Expand Down Expand Up @@ -951,9 +954,9 @@ Initializer *ExpInitializer::semantic(Scope *sc, Type *t, NeedInterpret needInte
}
exp = exp->implicitCastTo(sc, t);
}
L1:
if (exp->op == TOKerror)
return this;
L1:
if (needInterpret)
exp = exp->ctfeInterpret();
else
Expand Down
42 changes: 33 additions & 9 deletions src/struct.c
Original file line number Diff line number Diff line change
Expand Up @@ -1023,22 +1023,46 @@ bool StructDeclaration::fit(Loc loc, Scope *sc, Expressions *elements, Type *sty
}
offset = (unsigned)(v->offset + v->type->size());

Type *telem = v->type;
Type *t = v->type;
if (stype)
telem = telem->addMod(stype->mod);
Type *origType = telem;
while (!e->implicitConvTo(telem) && telem->toBasetype()->ty == Tsarray)
t = t->addMod(stype->mod);
Type *origType = t;
Type *tb = t->toBasetype();

/* Look for case of initializing a static array with a too-short
* string literal, such as:
* char[5] foo = "abc";
* Allow this by doing an explicit cast, which will lengthen the string
* literal.
*/
if (e->op == TOKstring && tb->ty == Tsarray)
{
StringExp *se = (StringExp *)e;
Type *typeb = se->type->toBasetype();
TY tynto = tb->nextOf()->ty;
if (!se->committed &&
(typeb->ty == Tarray || typeb->ty == Tsarray) &&
(tynto == Tchar || tynto == Twchar || tynto == Tdchar) &&
se->length(tb->nextOf()->size()) < ((TypeSArray *)tb)->dim->toInteger())
{
e = se->castTo(sc, t);
goto L1;
}
}

while (!e->implicitConvTo(t) && tb->ty == Tsarray)
{
/* Static array initialization, as in:
* T[3][5] = e;
*/
telem = telem->toBasetype()->nextOf();
t = tb->nextOf();
tb = t->toBasetype();
}
if (!e->implicitConvTo(t))
t = origType; // restore type for better diagnostic

if (!e->implicitConvTo(telem))
telem = origType; // restore type for better diagnostic

e = e->implicitCastTo(sc, telem);
e = e->implicitCastTo(sc, t);
L1:
if (e->op == TOKerror)
return false;

Expand Down
43 changes: 39 additions & 4 deletions test/runnable/literal.d
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
// REQUIRED_ARGS: -d
// PERMUTE_ARGS: -dw

import std.stdio;
import core.stdc.stdlib;
extern(C) int printf(const char*, ...);

enum
{
Expand Down Expand Up @@ -167,10 +164,48 @@ void test2()
assert(e == 2_463_534_242UL);
}

/***************************************************/
// 13907

void f13907_1(wchar[1] a) {}
void f13907_2(wchar[2] a) {}
void f13907_3(wchar[3] a) {}

auto f13907_12(char[1]) { return 1; }
auto f13907_12(char[2]) { return 2; }

void test13907()
{
static assert(!__traits(compiles, { f13907_1("\U00010000"w); }));
static assert(!__traits(compiles, { f13907_1("\U00010000" ); }));
f13907_2("\U00010000"w);
f13907_2("\U00010000");
static assert(!__traits(compiles, { f13907_3("\U00010000"w); }));
static assert(!__traits(compiles, { f13907_3("\U00010000" ); }));

assert(f13907_12("a") == 1);

// regression tests for the lengthen behavior in initializer
enum const(char*) p = "hello world";
static assert(!__traits(compiles, { static char[5] a = "hello world"; })); // truncation is not allowed
static assert(!__traits(compiles, { static void[20] a = "hello world"; }));
static assert(!__traits(compiles, { static int[20] a = "hello world"; }));
static assert(!__traits(compiles, { static char[20] a = "hello world"w; }));
static assert(!__traits(compiles, { static wchar[20] a = "hello world"d; }));
static assert(!__traits(compiles, { static dchar[20] a = "hello world"c; }));
static assert(!__traits(compiles, { static char[20] a = p; }));
static char[20] csa = "hello world"; // extending is allowed
static wchar[20] wsa = "hello world"; // ok
static dchar[20] dsa = "hello world"; // ok
}

/***************************************************/

int main()
{
test1();
test2();
test13907();

printf("Success\n");
return 0;
Expand Down

0 comments on commit 4e21e56

Please sign in to comment.