Skip to content
This repository has been archived by the owner on Oct 12, 2022. It is now read-only.

Commit

Permalink
Make encode reusabe in Phobos
Browse files Browse the repository at this point in the history
  • Loading branch information
edi33416 committed Dec 11, 2018
1 parent 3fafa7d commit 24dc693
Showing 1 changed file with 106 additions and 80 deletions.
186 changes: 106 additions & 80 deletions src/core/internal/utf.d
Original file line number Diff line number Diff line change
Expand Up @@ -425,61 +425,98 @@ dchar decode(in dchar[] s, ref size_t idx)
return c; // dummy return
}


/* =================== Encode ======================= */

/*******************************
* Encodes character c and appends it to array s[].
/**
* Encodes `c` into the static array `buf`.
*
* Params:
* buf = destination of encoded character
* c = character to encode
*
* Returns:
* The length of the encoded character (a number between `1` and `4` for
* `char[4]` buffers and a number between `1` and `2` for `wchar[2]` buffers)
* or `0` in case of failure.
*/
@safe pure nothrow
void encode(ref char[] s, dchar c)
in
@nogc nothrow pure @safe
size_t encode(out char[4] buf, dchar c)
in
{
assert(isValidDchar(c));
}
do
{
if (c <= 0x7F)
{
assert(isValidDchar(c));
buf[0] = cast(char) c;
return 1;
}
do
else if (c <= 0x7FF)
{
char[] r = s;

if (c <= 0x7F)
{
r ~= cast(char) c;
}
else
{
char[4] buf;
uint L;
buf[0] = cast(char)(0xC0 | (c >> 6));
buf[1] = cast(char)(0x80 | (c & 0x3F));
return 2;
}
else if (c <= 0xFFFF)
{
buf[0] = cast(char)(0xE0 | (c >> 12));
buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[2] = cast(char)(0x80 | (c & 0x3F));
return 3;
}
else if (c <= 0x10FFFF)
{
buf[0] = cast(char)(0xF0 | (c >> 18));
buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[3] = cast(char)(0x80 | (c & 0x3F));
return 4;
}
return 0;
}

if (c <= 0x7FF)
{
buf[0] = cast(char)(0xC0 | (c >> 6));
buf[1] = cast(char)(0x80 | (c & 0x3F));
L = 2;
}
else if (c <= 0xFFFF)
{
buf[0] = cast(char)(0xE0 | (c >> 12));
buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[2] = cast(char)(0x80 | (c & 0x3F));
L = 3;
}
else if (c <= 0x10FFFF)
{
buf[0] = cast(char)(0xF0 | (c >> 18));
buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[3] = cast(char)(0x80 | (c & 0x3F));
L = 4;
}
else
{
assert(0);
}
r ~= buf[0 .. L];
}
s = r;
/// ditto
@nogc nothrow pure @safe
size_t encode(out wchar[2] buf, dchar c)
in
{
assert(isValidDchar(c));
}
do
{
if (c <= 0xFFFF)
{
buf[0] = cast(wchar) c;
return 1;
}
else if (c <= 0x10FFFF)
{
buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
return 2;
}
return 0;
}

/**
* Encodes character c and appends it to array s[].
*/
nothrow pure @safe
void encode(ref char[] s, dchar c)
in
{
assert(isValidDchar(c));
}
do
{
char[4] buf;
size_t L = encode(buf, c);
assert(L); // If L is 0, then encode has failed
s ~= buf[0 .. L];
}

///
unittest
{
debug(utf) printf("utf.encode.unittest\n");
Expand All @@ -499,43 +536,32 @@ unittest
assert(s == "abcda\xC2\xA9\xE2\x89\xA0");
}

/** ditto */
@safe pure nothrow
/// ditto
nothrow pure @safe
void encode(ref wchar[] s, dchar c)
in
{
assert(isValidDchar(c));
}
do
{
wchar[] r = s;

if (c <= 0xFFFF)
{
r ~= cast(wchar) c;
}
else
{
wchar[2] buf;

buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
r ~= buf;
}
s = r;
}
in
{
assert(isValidDchar(c));
}
do
{
wchar[2] buf;
size_t L = encode(buf, c);
assert(L);
s ~= buf[0 .. L];
}

/** ditto */
@safe pure nothrow
/// ditto
nothrow pure @safe
void encode(ref dchar[] s, dchar c)
in
{
assert(isValidDchar(c));
}
do
{
s ~= c;
}
in
{
assert(isValidDchar(c));
}
do
{
s ~= c;
}

/**
Returns the code length of $(D c) in the encoding using $(D C) as a
Expand Down

0 comments on commit 24dc693

Please sign in to comment.