Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
  • 3 commits
  • 8 files changed
  • 0 commit comments
  • 2 contributors
Commits on May 26, 2013
@noword noword added tlk_convert, a python dialog.tlk converter to utf8
currently used for conversion of chinese GBK tlk to a utf8 one to enable
the use of regular fonts

Signed-off-by: Jaka Kranjc <lynxlupodian@users.sourceforge.net>
9b1fb5c
@noword noword added utf8-encoded tlk support 9279f41
@lynxlynxlynx lynxlynxlynx font: moved the zerospace detection from a config option to a fonts.2…
…da column
cd1f81f
View
79 gemrb/core/Font.cpp
@@ -42,7 +42,8 @@ Font::Font()
{
name[0] = '\0';
multibyte = false;
-
+ utf8 = false;
+ zeroSpace = false;
// TODO: list incomplete
// maybe want to externalize this
// list compiled form wiki: http://www.gemrb.org/wiki/doku.php?id=engine:encodings
@@ -58,6 +59,10 @@ Font::Font()
const char* encoding = core->TLKEncoding.c_str();
for (size_t i = 0; i < listSize; i++) {
+ if (stricmp(encoding, "UTF-8") == 0) {
+ utf8 = true;
+ break;
+ }
if (stricmp(encoding, multibyteEncodings[i]) == 0) {
multibyte = true;
break;
@@ -597,6 +602,10 @@ void Font::SetupString(ieWord* string, unsigned int width, bool NoColor, Font *i
size_t Font::GetDoubleByteString(const unsigned char* string, ieWord* &dbString) const
{
+ if (utf8)
+ {
+ return GetUtf8String(string, dbString);
+ }
size_t len = strlen((char*)string);
dbString = (ieWord*)malloc((len+1) * sizeof(ieWord));
size_t dbLen = 0;
@@ -654,5 +663,73 @@ int Font::dbStrLen(const ieWord* string) const
return count;
}
+/* The first byte of a UTF-8 encoding reveals its length. */
+unsigned char utf8_bytes[0x100] = {
+ /* 00-7f are themselves */
+/*00*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*10*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*20*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*30*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*40*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*50*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*60*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*70*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 80-bf are later bytes, out-of-sync if first */
+/*80*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*90*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*a0*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*b0*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* c0-df are first byte of two-byte sequences (5+6=11 bits) */
+ /* c0-c1 are noncanonical */
+/*c0*/ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+/*d0*/ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ /* e0-ef are first byte of three-byte (4+6+6=16 bits) */
+ /* e0 80-9f are noncanonical */
+/*e0*/ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* f0-f7 are first byte of four-byte (3+6+6+6=21 bits) */
+ /* f0 80-8f are noncanonical */
+/*f0*/ 4, 4, 4, 4, 4, 4, 4, 4,
+ /* f8-fb are first byte of five-byte (2+6+6+6+6=26 bits) */
+ /* f8 80-87 are noncanonical */
+/*f8*/ 5, 5, 5, 5,
+ /* fc-fd are first byte of six-byte (1+6+6+6+6+6=31 bits) */
+ /* fc 80-83 are noncanonical */
+/*fc*/ 6, 6,
+ /* fe and ff are not part of valid UTF-8 so they stand alone */
+/*fe*/ 1, 1
+};
+
+ieWord Font::readUtf8(const unsigned char *src, size_t *readed_length) const
+{
+ size_t nb = utf8_bytes[*src];
+
+ *readed_length = nb;
+ if (nb <= 1 || nb > 6)
+ return *src;
+ ieWord ch = *src & ((1 << (7 - nb)) - 1);
+ while (--nb)
+ ch <<= 6, ch |= *++src & 0x3f;
+
+ return ch;
+}
+
+size_t Font::GetUtf8String(const unsigned char* utf8String, ieWord* &utf16String) const
+{
+ size_t utf8Len = strlen((char*)utf8String);
+ utf16String = (ieWord*)malloc((utf8Len+1) * sizeof(ieWord));
+ size_t utf16Len = 0;
+ while (utf8Len > 0)
+ {
+ size_t len;
+ utf16String[utf16Len] = readUtf8(utf8String, &len);
+ utf8Len -= len;
+ utf8String += len;
+ utf16Len++;
+ }
+ utf16String[utf16Len] = '\0';
+ utf16String = (ieWord*)realloc(utf16String, (utf16Len+1) * sizeof(ieWord));
+ return utf16Len;
+}
+
#undef SET_BLIT_PALETTE
}
View
5 gemrb/core/Font.h
@@ -69,6 +69,8 @@ class GEM_EXPORT Font {
Sprite2D* blank;
bool multibyte;
+ bool utf8;
+ bool zeroSpace;
public:
int maxHeight;
@@ -88,6 +90,7 @@ class GEM_EXPORT Font {
virtual ieWord GetPointSize() const {return 0;};
virtual FontStyle GetStyle() const {return NORMAL;};
+ void SetIgnoreSpaceWidth(bool ignore) { zeroSpace = ignore; };
Palette* GetPalette() const;
void SetPalette(Palette* pal);
@@ -110,6 +113,7 @@ class GEM_EXPORT Font {
int CalcStringWidth(const unsigned char* string, bool NoColor = false) const;
void SetupString(ieWord* string, unsigned int width, bool NoColor = false, Font *initials = NULL, bool enablecap = false) const;
size_t GetDoubleByteString(const unsigned char* string, ieWord* &dbString) const;
+ size_t GetUtf8String(const unsigned char* string, ieWord* &uniString) const;
protected:
virtual int GetKerningOffset(ieWord /*leftChr*/, ieWord /*rightChr*/) const {return 0;};
@@ -118,6 +122,7 @@ class GEM_EXPORT Font {
int CalcStringWidth(const ieWord* string, bool NoColor = false) const;
int CalcStringHeight(const ieWord* string, unsigned int len, bool NoColor) const;
int dbStrLen(const ieWord* string) const;
+ ieWord readUtf8(const unsigned char *src, size_t *readed_length) const;
};
}
View
4 gemrb/core/Interface.cpp
@@ -1350,11 +1350,13 @@ int Interface::LoadSprites()
const char* font_name;
unsigned short font_size = 0;
FontStyle font_style = NORMAL;
+ bool zero_space = false;
if (CustomFontPath[0]) {
font_name = tab->QueryField( rowName, "FONT_NAME" );// map a font alternative to the BAM ResRef since CHUs contain hardcoded refrences.
font_size = atoi( tab->QueryField( rowName, "PT_SIZE" ) );// not available in BAM fonts.
font_style = (FontStyle)atoi( tab->QueryField( rowName, "STYLE" ) );// not available in BAM fonts.
+ zero_space = (bool)atoi( tab->QueryField( rowName, "ZEROSPACE" ) );
}else{
font_name = ResRef;
}
@@ -1411,6 +1413,7 @@ int Interface::LoadSprites()
fnt->AddResRef(ResRef);
fnt->SetName(font_name);
+ fnt->SetIgnoreSpaceWidth(zero_space);
fonts.push_back(fnt);
}
@@ -1518,6 +1521,7 @@ int Interface::Init(InterfaceConfig* config)
CONFIG_INT("NumFingKboard", NumFingKboard = );
CONFIG_INT("NumFingInfo", NumFingInfo = );
CONFIG_INT("MouseFeedback", MouseFeedback = );
+
#undef CONFIG_INT
#define CONFIG_STRING(key, var, default) \
View
1  gemrb/core/Interface.h
@@ -794,6 +794,7 @@ class GEM_EXPORT Interface
int GUIEnhancements;
bool KeepCache;
bool MultipleQuickSaves;
+
Variables *plugin_flags;
/** The Main program loop */
void Main(void);
View
5 gemrb/plugins/TTFImporter/TTFFont.cpp
@@ -39,6 +39,7 @@ namespace GemRB {
const Sprite2D* TTFFont::GetCharSprite(ieWord chr) const
{
#if HAVE_ICONV
+ if (!utf8) {
char* oldchar = (char*)&chr;
ieWord unicodeChr = 0;
char* newchar = (char*)&unicodeChr;
@@ -57,6 +58,7 @@ const Sprite2D* TTFFont::GetCharSprite(ieWord chr) const
}
iconv_close(cd);
chr = unicodeChr;
+ }
#endif
const Holder<Sprite2D>* sprCache = glyphCache->get(chr);
if (sprCache) {
@@ -266,7 +268,8 @@ TTFFont::TTFFont(FT_Face face, ieWord ptSize, FontStyle style, Palette* pal)
// TODO: ttf fonts have a "box" glyph they use for this
blank = core->GetVideoDriver()->CreateSprite8(0, 0, 8, NULL, palette->col);
// ttf fonts dont produce glyphs for whitespace
- Sprite2D* space = core->GetVideoDriver()->CreateSprite8((int)(ptSize * 0.25), 0, 8, NULL, palette->col);;
+ int SpaceWidth = zeroSpace? 1 : (ptSize * 0.25);
+ Sprite2D* space = core->GetVideoDriver()->CreateSprite8(SpaceWidth, 0, 8, NULL, palette->col);;
Sprite2D* tab = core->GetVideoDriver()->CreateSprite8((space->Width)*4, 0, 8, NULL, palette->col);
// now cache these glyphs for quick access
View
33 tools/tlk_convert/base.py
@@ -0,0 +1,33 @@
+#! /usr/bin/env python
+#coding=utf-8
+import os
+
+class Base:
+ SIGN = ""
+ def __init__(self, io=None):
+ if io:
+ self.load(io)
+
+ def load(self, io):
+ if io.read(len(self.SIGN)) != self.SIGN:
+ raise TypeError
+
+ self._load(io)
+
+ def _load(self, io):
+ raise NotImplementedError
+
+ def save(self, io):
+ io.write(self.SIGN)
+ self._save(io)
+
+ def _save(self, io):
+ raise NotImplementedError
+
+def BaseFactory(io, class_list):
+ pos = io.tell()
+ for c in class_list:
+ sign = io.read(len(c.SIGN))
+ io.seek(pos, os.SEEK_SET)
+ if sign == c.SIGN:
+ return c(io)
View
49 tools/tlk_convert/tlk.py
@@ -0,0 +1,49 @@
+#! /usr/bin/env python
+#coding=utf-8
+from base import Base
+from struct import unpack, pack
+import os
+import cStringIO
+#http://gemrb.org/iesdp/file_formats/ie_formats/tlk_v1.htm
+
+class Tlk(Base, list):
+ SIGN = "TLK V1 "
+ def _load(self, io):
+ self.language_id, num, offset = unpack("<HII", io.read(0xa))
+
+ for i in range(num):
+ self.append(dict(zip(("flag", "sound_name", "volume", "pitch", "offset", "length"),
+ unpack("<H 8s 4I", io.read(0x1a)))))
+
+ for t in self:
+ io.seek(offset+t["offset"], os.SEEK_SET)
+ t["string"] = io.read(t["length"])
+
+ def _save(self, io):
+ offset = len(self)*0x1a + 0x12
+ io.write(pack("<HII", self.language_id, len(self), offset))
+
+ string_io = cStringIO.StringIO()
+ for t in self:
+ t["length"] = len(t["string"])
+ if t["length"] == 0:
+ t["offset"] = 0
+ else:
+ t["offset"] = string_io.tell()
+
+ io.write(pack("<H 8s 4I", t["flag"], t["sound_name"], t["volume"], t["pitch"], t["offset"], t["length"]))
+ string_io.write(t["string"])
+
+ io.write(string_io.getvalue())
+
+ def __str__(self):
+ s = []
+ for i, t in enumerate(self):
+ s.append("%d %04x %8s %08x %08x %08x %08x %s"%(i, t["flag"], t["sound_name"].strip("\x00"), t["volume"], t["pitch"], t["offset"], t["length"], t["string"]))
+ return "\n".join(s)
+
+if __name__ == "__main__":
+ import sys
+ t = Tlk(open(sys.argv[1], "rb"))
+ print t
+ #t.save(open("1.bin", "wb"))
View
56 tools/tlk_convert/tlk_convert.py
@@ -0,0 +1,56 @@
+#! /usr/bin/env python
+#coding=utf-8
+from tlk import Tlk
+
+PUNCTUATIONS = u",。!“”-…,.!"
+
+def insert_space(utf16_str, interval=1, codec = None):
+ if codec:
+ utf16_str = utf16_str.decode(codec)
+
+ utf16_str = utf16_str.replace(u" ", u" ")
+ words = []
+ word = u""
+ for i, u in enumerate(utf16_str):
+ word += u
+ if ord(u) > 0x100 \
+ and len(word) >= interval \
+ and (i+1 < len(utf16_str) and utf16_str[i+1] not in PUNCTUATIONS):
+ words.append(word)
+ word = u""
+ if len(word) > 0:
+ words.append(word)
+ s = u" ".join(words)
+ if codec:
+ s = s.encode(codec)
+ return s
+
+def convert_to_utf8(tlk_name, codec = "GBK", need_space = True):
+ tlk = Tlk(open(tlk_name, "rb"))
+ tlk.save(open(tlk_name+".bak", "wb"))
+ for i, t in enumerate(tlk):
+ try:
+ txt = t["string"].decode(codec).encode("utf-8")
+ except:
+ print "Warning: ", i
+ continue
+ if need_space:
+ txt = insert_space(txt, codec="utf-8")
+ t["string"] = txt
+ tlk.save(open(tlk_name, "wb"))
+
+if __name__ == "__main__":
+ import argparse
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("name", action="store", nargs = 1)
+ parser.add_argument("codec", action="store", nargs = "?")
+ parser.add_argument("--disable_space", action="store_true", default = False)
+
+ args = parser.parse_args()
+ codec = args.codec
+ if not codec:
+ codec = "GBK"
+
+ convert_to_utf8(args.name[0], codec, not args.disable_space)
+

No commit comments for this range

Something went wrong with that request. Please try again.