Showing with 120 additions and 29 deletions.
  1. +37 −11 src/e2ir.c
  2. +45 −3 src/expression.d
  3. +4 −1 src/expression.h
  4. +14 −2 src/iasm.c
  5. +3 −3 src/s2ir.c
  6. +15 −6 src/todt.c
  7. +2 −3 src/toobj.c
48 changes: 37 additions & 11 deletions src/e2ir.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Symbol *toSymbol(Dsymbol *s);
elem *toElem(Expression *e, IRState *irs);
dt_t **Expression_toDt(Expression *e, dt_t **pdt);
Symbol *toStringSymbol(const char *str, size_t len, size_t sz);
Symbol *toStringSymbol(StringExp *se);
void toObjFile(Dsymbol *ds, bool multiobj);
Symbol *toModuleAssert(Module *m);
Symbol *toModuleUnittest(Module *m);
Expand Down Expand Up @@ -1377,14 +1378,13 @@ elem *toElem(Expression *e, IRState *irs)
Type *tb = se->type->toBasetype();
if (tb->ty == Tarray)
{
Symbol *si = toStringSymbol((const char *)se->string, se->len, se->sz);
e = el_pair(TYdarray, el_long(TYsize_t, se->len), el_ptr(si));
Symbol *si = toStringSymbol(se);
e = el_pair(TYdarray, el_long(TYsize_t, se->numberOfCodeUnits()), el_ptr(si));
}
else if (tb->ty == Tsarray)
{
Symbol *si = toStringSymbol((const char *)se->string, se->len, se->sz);
Symbol *si = toStringSymbol(se);
e = el_var(si);

e->Ejty = e->Ety = TYstruct;
e->ET = si->Stype;
e->ET->Tcount++;
Expand All @@ -1394,11 +1394,10 @@ elem *toElem(Expression *e, IRState *irs)
e = el_calloc();
e->Eoper = OPstring;
// freed in el_free
unsigned len = se->len * se->sz;
e->EV.ss.Vstring = (char *)mem_malloc(len + se->sz);
memcpy(e->EV.ss.Vstring, se->string, len);
memset(e->EV.ss.Vstring + len, 0, se->sz);
e->EV.ss.Vstrlen = len + se->sz;
unsigned len = (se->numberOfCodeUnits() + 1) * se->sz;
e->EV.ss.Vstring = (char *)mem_malloc(len);
se->writeTo(e->EV.ss.Vstring, true);
e->EV.ss.Vstrlen = len;
e->Ety = TYnptr;
}
else
Expand Down Expand Up @@ -5596,13 +5595,18 @@ elem *toElemDtor(Expression *e, IRState *irs)

/*******************************************************
* Write read-only string to object file, create a local symbol for it.
* str[len] must be 0.
* Makes a copy of str's contents, does not keep a reference to it.
* Params:
* str = string
* len = number of code units in string
* sz = number of bytes per code unit
* Returns:
* Symbol
*/

Symbol *toStringSymbol(const char *str, size_t len, size_t sz)
{
//printf("toStringSymbol() %p\n", stringTab);
assert(str[len * sz] == 0);
StringValue *sv = stringTab->update(str, len * sz);
if (!sv->ptrvalue)
{
Expand All @@ -5619,6 +5623,28 @@ Symbol *toStringSymbol(const char *str, size_t len, size_t sz)
return (Symbol *)sv->ptrvalue;
}

/*******************************************************
* Turn StringExp into Symbol.
*/

Symbol *toStringSymbol(StringExp *se)
{
Symbol *si;
int n = se->numberOfCodeUnits();
char* p = se->toPtr();
if (p)
{
si = toStringSymbol(p, n, se->sz);
}
else
{
p = (char *)mem.xmalloc(n * se->sz);
se->writeTo(p, false);
si = toStringSymbol(p, n, se->sz);
mem.xfree(p);
}
return si;
}

/******************************************************
* Return an elem that is the file, line, and function suitable
Expand Down
48 changes: 45 additions & 3 deletions src/expression.d
Original file line number Diff line number Diff line change
Expand Up @@ -4270,11 +4270,12 @@ public:
* Returns:
* number of code units
*/
final size_t numberOfCodeUnits(int tynto)
final size_t numberOfCodeUnits(int tynto = 0)
{
int encSize;
switch (tynto)
{
case 0: return len;
case Tchar: encSize = 1; break;
case Twchar: encSize = 2; break;
case Tdchar: encSize = 4; break;
Expand Down Expand Up @@ -4323,6 +4324,48 @@ public:
return result;
}

/**********************************************
* Write the contents of the string to dest.
* Use numberOfCodeUnits() to determine size of result.
* Params:
* dest = destination
* tyto = encoding type of the result
* zero = add terminating 0
*/
void writeTo(void* dest, bool zero, int tyto = 0)
{
int encSize;
switch (tyto)
{
case 0: encSize = sz; break;
case Tchar: encSize = 1; break;
case Twchar: encSize = 2; break;
case Tdchar: encSize = 4; break;
default:
assert(0);
}
if (sz == encSize)
{
memcpy(dest, string, len * sz);
if (zero)
memset(dest + len * sz, 0, sz);
}
else
assert(0);
}

/**************************************************
* If the string data is UTF-8 and can be accessed directly,
* return a pointer to it.
* Do not assume a terminating 0.
* Returns:
* pointer to string data if possible, null if not
*/
char* toPtr()
{
return (sz == 1) ? cast(char*)string : null;
}

override StringExp toStringExp()
{
return this;
Expand Down Expand Up @@ -4448,8 +4491,7 @@ public:
{
auto nbytes = len * sz;
char* s = cast(char*)mem.xmalloc(nbytes + sz);
memcpy(s, string, nbytes);
memset(s + nbytes, 0, sz);
writeTo(s, true);
return s;
}

Expand Down
5 changes: 4 additions & 1 deletion src/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,9 +356,9 @@ class NullExp : public Expression

class StringExp : public Expression
{
public:
void *string; // char, wchar, or dchar data
size_t len; // number of chars, wchars, or dchars
public:
unsigned char sz; // 1: char, 2: wchar, 4: dchar
unsigned char committed; // !=0 if type is committed
utf8_t postfix; // 'c', 'w', 'd'
Expand All @@ -380,6 +380,9 @@ class StringExp : public Expression
unsigned charAt(uinteger_t i);
void accept(Visitor *v) { v->visit(this); }
char *toStringz();
size_t numberOfCodeUnits(int tynto = 0);
void writeTo(void* dest, bool zero, int tyto = 0);
char *toPtr();
};

// Tuple
Expand Down
16 changes: 14 additions & 2 deletions src/iasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -3470,6 +3470,7 @@ static code *asm_db_parse(OP *pop)
{
size_t len;
unsigned char *q;
unsigned char *qstart = NULL;

if (usBytes+usSize > usMaxbytes)
{
Expand Down Expand Up @@ -3571,6 +3572,11 @@ static code *asm_db_parse(OP *pop)

usBytes += len * usSize;
}
if (qstart)
{
mem_free(qstart);
qstart = NULL;
}
break;

case TOKidentifier:
Expand Down Expand Up @@ -3606,8 +3612,14 @@ static code *asm_db_parse(OP *pop)
else if (e->op == TOKstring)
{
StringExp *se = (StringExp *)e;
q = (unsigned char *)se->string;
len = se->len;
len = se->numberOfCodeUnits();
q = (unsigned char *)se->toPtr();
if (!q)
{
qstart = (unsigned char *)mem_malloc(len * se->sz);
se->writeTo(qstart, false);
q = qstart;
}
goto L3;
}
goto Ldefault;
Expand Down
6 changes: 3 additions & 3 deletions src/s2ir.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
#include "target.h"
#include "visitor.h"

Symbol *toStringSymbol(const char *str, size_t len, size_t sz);
Symbol *toStringSymbol(StringExp *se);
elem *exp2_copytotemp(elem *e);
elem *incUsageElem(IRState *irs, Loc loc);
elem *addressElem(elem *e, Type *t, bool alwaysCopy = false);
Expand Down Expand Up @@ -564,8 +564,8 @@ class S2irVisitor : public Visitor
else
{
StringExp *se = (StringExp *)(cs->exp);
Symbol *si = toStringSymbol((char *)se->string, se->len, se->sz);
dtsize_t(&dt, se->len);
Symbol *si = toStringSymbol(se);
dtsize_t(&dt, se->numberOfCodeUnits());
dtxoff(&dt, si, 0);
}
}
Expand Down
21 changes: 15 additions & 6 deletions src/todt.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,26 +377,33 @@ dt_t **Expression_toDt(Expression *e, dt_t **pdt)
Type *t = e->type->toBasetype();

// BUG: should implement some form of static string pooling
int n = e->numberOfCodeUnits();
char *p = e->toPtr();
if (!p)
{
p = (char *)mem.xmalloc(n * e->sz);
e->writeTo(p, false);
}
switch (t->ty)
{
case Tarray:
pdt = dtsize_t(pdt, e->len);
pdt = dtsize_t(pdt, n);
case Tpointer:
pdt = dtabytes(pdt, 0, e->len * e->sz, (const char *)e->string, (unsigned)e->sz);
pdt = dtabytes(pdt, 0, n * e->sz, p, (unsigned)e->sz);
break;

case Tsarray:
{
TypeSArray *tsa = (TypeSArray *)t;

pdt = dtnbytes(pdt, e->len * e->sz, (const char *)e->string);
pdt = dtnbytes(pdt, n * e->sz, p);
if (tsa->dim)
{
dinteger_t dim = tsa->dim->toInteger();
if (e->len < dim)
if (n < dim)
{
// Pad remainder with 0
pdt = dtnzeros(pdt, (dim - e->len) * tsa->next->size());
pdt = dtnzeros(pdt, (dim - n) * tsa->next->size());
}
}
break;
Expand All @@ -406,6 +413,8 @@ dt_t **Expression_toDt(Expression *e, dt_t **pdt)
printf("StringExp::toDt(type = %s)\n", e->type->toChars());
assert(0);
}
if (p != e->toPtr())
mem.xfree(p);
}

void visit(ArrayLiteralExp *e)
Expand Down Expand Up @@ -897,7 +906,7 @@ dt_t **toDtElem(TypeSArray *tsa, dt_t **pdt, Expression *e)
{
// Bugzilla 1914, 3198
if (e->op == TOKstring)
len /= ((StringExp *)e)->len;
len /= ((StringExp *)e)->numberOfCodeUnits();
else if (e->op == TOKarrayliteral)
len /= ((ArrayLiteralExp *)e)->elements->dim;
}
Expand Down
5 changes: 2 additions & 3 deletions src/toobj.c
Original file line number Diff line number Diff line change
Expand Up @@ -1098,9 +1098,8 @@ void toObjFile(Dsymbol *ds, bool multiobj)
assert(e->op == TOKstring);

StringExp *se = (StringExp *)e;
char *name = (char *)mem.xmalloc(se->len + 1);
memcpy(name, se->string, se->len);
name[se->len] = 0;
char *name = (char *)mem.xmalloc(se->numberOfCodeUnits() + 1);
se->writeTo(name, true);

/* Embed the library names into the object file.
* The linker will then automatically
Expand Down