Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

aliviate some of the nastiness of the UTF-8 support patch.

reduce repeated code and encapsulate UTF8 specific behavior to where it is used.
  • Loading branch information...
commit 4a18616f899580ade7807e2150afcd06a8b1de4d 1 parent ae053a6
@bradallred bradallred authored
Showing with 59 additions and 81 deletions.
  1. +59 −79 gemrb/core/Font.cpp
  2. +0 −2  gemrb/core/Font.h
View
138 gemrb/core/Font.cpp
@@ -575,22 +575,70 @@ void Font::SetupString(ieWord* string, unsigned int width, bool NoColor, Font *i
size_t Font::GetDoubleByteString(const unsigned char* string, ieWord* &dbString) const
{
- if (utf8)
- {
- return GetUtf8String(string, dbString);
- }
size_t len = strlen((char*)string);
dbString = (ieWord*)malloc((len+1) * sizeof(ieWord));
size_t dbLen = 0;
for(size_t i=0; i<len; ++i)
{
- // we are assuming that every multibyte encoding uses single bytes for chars 32 - 127
- if( multibyte && (i+1 < len) && (string[i] >= 128 || string[i] < 32)) { // this is a double byte char
- dbString[dbLen] = (string[i+1] << 8) + string[i];
- ++i;
- } else
- dbString[dbLen] = string[i];
- assert(dbString[dbLen] != 0);
+ if (utf8) {
+ /* The first byte of a UTF-8 encoding reveals its length. */
+ static const unsigned char utf8_bytes[0x100] = {
+ /* 00-7f are themselves */
+ /*00*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*10*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*20*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*30*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*40*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*50*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*60*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*70*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 80-bf are later bytes, out-of-sync if first */
+ /*80*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*90*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*a0*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /*b0*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* c0-df are first byte of two-byte sequences (5+6=11 bits) */
+ /* c0-c1 are noncanonical */
+ /*c0*/ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ /*d0*/ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ /* e0-ef are first byte of three-byte (4+6+6=16 bits) */
+ /* e0 80-9f are noncanonical */
+ /*e0*/ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* f0-f7 are first byte of four-byte (3+6+6+6=21 bits) */
+ /* f0 80-8f are noncanonical */
+ /*f0*/ 4, 4, 4, 4, 4, 4, 4, 4,
+ /* f8-fb are first byte of five-byte (2+6+6+6+6=26 bits) */
+ /* f8 80-87 are noncanonical */
+ /*f8*/ 5, 5, 5, 5,
+ /* fc-fd are first byte of six-byte (1+6+6+6+6+6=31 bits) */
+ /* fc 80-83 are noncanonical */
+ /*fc*/ 6, 6,
+ /* fe and ff are not part of valid UTF-8 so they stand alone */
+ /*fe*/ 1, 1
+ };
+
+ size_t nb = utf8_bytes[*string];
+
+ i += nb;
+ if (nb <= 1 || nb > 6) {
+ dbString[dbLen] = *string;
+ } else {
+ ieWord ch = *string & ((1 << (7 - nb)) - 1);
+ while (--nb)
+ ch <<= 6, ch |= *++string & 0x3f;
+
+ dbString[dbLen] = ch;
+ }
+ } else {
+ // we are assuming that every multibyte encoding uses single bytes for chars 32 - 127
+ if( multibyte && (i+1 < len) && (string[i] >= 128 || string[i] < 32)) { // this is a double byte char
+ dbString[dbLen] = (string[i+1] << 8) + string[i];
+ ++i;
+ } else {
+ dbString[dbLen] = string[i];
+ }
+ }
+ assert(dbString[dbLen] != 0); // premature end of string
++dbLen;
}
dbString[dbLen] = '\0';
@@ -636,72 +684,4 @@ int Font::dbStrLen(const ieWord* string) const
return count;
}
-/* The first byte of a UTF-8 encoding reveals its length. */
-unsigned char utf8_bytes[0x100] = {
- /* 00-7f are themselves */
-/*00*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*10*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*20*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*30*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*40*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*50*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*60*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*70*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* 80-bf are later bytes, out-of-sync if first */
-/*80*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*90*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*a0*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-/*b0*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* c0-df are first byte of two-byte sequences (5+6=11 bits) */
- /* c0-c1 are noncanonical */
-/*c0*/ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-/*d0*/ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- /* e0-ef are first byte of three-byte (4+6+6=16 bits) */
- /* e0 80-9f are noncanonical */
-/*e0*/ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* f0-f7 are first byte of four-byte (3+6+6+6=21 bits) */
- /* f0 80-8f are noncanonical */
-/*f0*/ 4, 4, 4, 4, 4, 4, 4, 4,
- /* f8-fb are first byte of five-byte (2+6+6+6+6=26 bits) */
- /* f8 80-87 are noncanonical */
-/*f8*/ 5, 5, 5, 5,
- /* fc-fd are first byte of six-byte (1+6+6+6+6+6=31 bits) */
- /* fc 80-83 are noncanonical */
-/*fc*/ 6, 6,
- /* fe and ff are not part of valid UTF-8 so they stand alone */
-/*fe*/ 1, 1
-};
-
-ieWord Font::readUtf8(const unsigned char *src, size_t *readed_length) const
-{
- size_t nb = utf8_bytes[*src];
-
- *readed_length = nb;
- if (nb <= 1 || nb > 6)
- return *src;
- ieWord ch = *src & ((1 << (7 - nb)) - 1);
- while (--nb)
- ch <<= 6, ch |= *++src & 0x3f;
-
- return ch;
-}
-
-size_t Font::GetUtf8String(const unsigned char* utf8String, ieWord* &utf16String) const
-{
- size_t utf8Len = strlen((char*)utf8String);
- utf16String = (ieWord*)malloc((utf8Len+1) * sizeof(ieWord));
- size_t utf16Len = 0;
- while (utf8Len > 0)
- {
- size_t len;
- utf16String[utf16Len] = readUtf8(utf8String, &len);
- utf8Len -= len;
- utf8String += len;
- utf16Len++;
- }
- utf16String[utf16Len] = '\0';
- utf16String = (ieWord*)realloc(utf16String, (utf16Len+1) * sizeof(ieWord));
- return utf16Len;
-}
-
}
View
2  gemrb/core/Font.h
@@ -111,7 +111,6 @@ class GEM_EXPORT Font {
int CalcStringWidth(const unsigned char* string, bool NoColor = false) const;
void SetupString(ieWord* string, unsigned int width, bool NoColor = false, Font *initials = NULL, bool enablecap = false) const;
size_t GetDoubleByteString(const unsigned char* string, ieWord* &dbString) const;
- size_t GetUtf8String(const unsigned char* string, ieWord* &uniString) const;
protected:
virtual int GetKerningOffset(ieWord /*leftChr*/, ieWord /*rightChr*/) const {return 0;};
@@ -120,7 +119,6 @@ class GEM_EXPORT Font {
int CalcStringWidth(const ieWord* string, bool NoColor = false) const;
int CalcStringHeight(const ieWord* string, unsigned int len, bool NoColor) const;
int dbStrLen(const ieWord* string) const;
- ieWord readUtf8(const unsigned char *src, size_t *readed_length) const;
};
}
Please sign in to comment.
Something went wrong with that request. Please try again.