Skip to content
This repository has been archived by the owner on Nov 17, 2020. It is now read-only.

Commit

Permalink
Made g_utf8_to_ucs4_fast() even faster
Browse files Browse the repository at this point in the history
  • Loading branch information
mzabaluev authored and Matthias Clasen committed Jun 5, 2011
1 parent 7c801ba commit e668e6e
Showing 1 changed file with 16 additions and 29 deletions.
45 changes: 16 additions & 29 deletions glib/gutf8.c
Expand Up @@ -851,7 +851,6 @@ g_utf8_to_ucs4_fast (const gchar *str,
glong len,
glong *items_written)
{
gint j, charlen;
gunichar *result;
gint n_chars, i;
const gchar *p;
Expand Down Expand Up @@ -882,49 +881,37 @@ g_utf8_to_ucs4_fast (const gchar *str,
p = str;
for (i=0; i < n_chars; i++)
{
gunichar wc = ((unsigned char *)p)[0];
gunichar wc = (guchar)*p++;

if (wc < 0x80)
{
result[i] = wc;
p++;
}
else
{
if (wc < 0xe0)
{
charlen = 2;
wc &= 0x1f;
}
else if (wc < 0xf0)
{
charlen = 3;
wc &= 0x0f;
}
else if (wc < 0xf8)
{
charlen = 4;
wc &= 0x07;
}
else if (wc < 0xfc)
{
charlen = 5;
wc &= 0x03;
}
else
gunichar mask = 0x40;

if (G_UNLIKELY ((wc & mask) == 0))
{
charlen = 6;
wc &= 0x01;
/* It's an out-of-sequence 10xxxxxxx byte.
* Rather than making an ugly hash of this and the next byte
* and overrunning the buffer, it's more useful to treat it
* with a replacement character */
result[i] = 0xfffd;
continue;
}

for (j = 1; j < charlen; j++)
do
{
wc <<= 6;
wc |= ((unsigned char *)p)[j] & 0x3f;
wc |= (guchar)(*p++) & 0x3f;
mask <<= 5;
}
while((wc & mask) != 0);

wc &= mask - 1;

result[i] = wc;
p += charlen;
}
}
result[i] = 0;
Expand Down

0 comments on commit e668e6e

Please sign in to comment.