added tlk_convert, a python dialog.tlk converter to utf8

currently used for conversion of chinese GBK tlk to a utf8 one to enable the use of regular fonts Signed-off-by: Jaka Kranjc <lynxlupodian@users.sourceforge.net>
gemrb · May 26, 2013 · 9b1fb5c · 9b1fb5c
1 parent fe0c1cf
commit 9b1fb5c
Show file tree

Hide file tree

Showing 3 changed files with 138 additions and 0 deletions.
diff --git a/tools/tlk_convert/base.py b/tools/tlk_convert/base.py
@@ -0,0 +1,33 @@
+#! /usr/bin/env python
+#coding=utf-8
+import os
+
+class Base:
+    SIGN = ""
+    def __init__(self, io=None):
+        if io:
+            self.load(io)
+
+    def load(self, io):
+        if io.read(len(self.SIGN)) != self.SIGN:
+            raise TypeError
+
+        self._load(io)
+
+    def _load(self, io):
+        raise NotImplementedError
+
+    def save(self, io):
+        io.write(self.SIGN)
+        self._save(io)
+
+    def _save(self, io):
+        raise NotImplementedError
+
+def BaseFactory(io, class_list):
+    pos = io.tell()
+    for c in class_list:
+        sign = io.read(len(c.SIGN))
+        io.seek(pos, os.SEEK_SET)
+        if sign == c.SIGN:
+            return c(io)
diff --git a/tools/tlk_convert/tlk.py b/tools/tlk_convert/tlk.py
@@ -0,0 +1,49 @@
+#! /usr/bin/env python
+#coding=utf-8
+from base import Base
+from struct import unpack, pack
+import os
+import cStringIO
+#http://gemrb.org/iesdp/file_formats/ie_formats/tlk_v1.htm
+
+class Tlk(Base, list):
+    SIGN = "TLK V1  "
+    def _load(self, io):
+        self.language_id, num, offset = unpack("<HII", io.read(0xa))
+
+        for i in range(num):
+            self.append(dict(zip(("flag", "sound_name", "volume", "pitch", "offset", "length"), 
+                                 unpack("<H 8s 4I", io.read(0x1a)))))
+
+        for t in self:
+            io.seek(offset+t["offset"], os.SEEK_SET)
+            t["string"] = io.read(t["length"])
+
+    def _save(self, io):
+        offset = len(self)*0x1a + 0x12
+        io.write(pack("<HII", self.language_id, len(self), offset))
+
+        string_io = cStringIO.StringIO()
+        for t in self:
+            t["length"] = len(t["string"])
+            if t["length"] == 0:
+                t["offset"] = 0
+            else:
+                t["offset"] = string_io.tell()
+
+            io.write(pack("<H 8s 4I", t["flag"], t["sound_name"], t["volume"], t["pitch"], t["offset"], t["length"]))
+            string_io.write(t["string"])
+
+        io.write(string_io.getvalue())
+
+    def __str__(self):
+        s = []
+        for i, t in enumerate(self):
+            s.append("%d %04x %8s %08x %08x %08x %08x %s"%(i, t["flag"], t["sound_name"].strip("\x00"), t["volume"], t["pitch"], t["offset"], t["length"], t["string"]))
+        return "\n".join(s)
+
+if __name__ == "__main__":
+    import sys
+    t = Tlk(open(sys.argv[1], "rb"))
+    print t
+    #t.save(open("1.bin", "wb"))
diff --git a/tools/tlk_convert/tlk_convert.py b/tools/tlk_convert/tlk_convert.py
@@ -0,0 +1,56 @@
+#! /usr/bin/env python
+#coding=utf-8
+from tlk import Tlk
+
+PUNCTUATIONS = u"，。！“”－…,.!"
+
+def insert_space(utf16_str, interval=1, codec = None):
+    if codec:
+        utf16_str = utf16_str.decode(codec)
+
+    utf16_str = utf16_str.replace(u" ", u"　")
+    words = []
+    word = u""
+    for i, u in enumerate(utf16_str):
+        word += u
+        if ord(u) > 0x100 \
+           and len(word) >= interval \
+           and (i+1 < len(utf16_str) and utf16_str[i+1] not in PUNCTUATIONS):
+            words.append(word)
+            word = u""
+    if len(word) > 0:
+        words.append(word)
+    s = u" ".join(words)
+    if codec:
+        s = s.encode(codec)
+    return s
+
+def convert_to_utf8(tlk_name, codec = "GBK", need_space = True):
+    tlk = Tlk(open(tlk_name, "rb"))
+    tlk.save(open(tlk_name+".bak", "wb"))
+    for i, t in enumerate(tlk):
+        try:
+            txt = t["string"].decode(codec).encode("utf-8")
+        except:
+            print "Warning: ", i
+            continue
+        if need_space:
+            txt = insert_space(txt, codec="utf-8")
+        t["string"] = txt
+    tlk.save(open(tlk_name, "wb"))
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("name", action="store", nargs = 1)
+    parser.add_argument("codec", action="store", nargs = "?")
+    parser.add_argument("--disable_space", action="store_true", default = False)
+
+    args = parser.parse_args()
+    codec = args.codec
+    if not codec:
+        codec = "GBK"
+
+    convert_to_utf8(args.name[0], codec, not args.disable_space)
+