Navigation Menu

Skip to content

Commit

Permalink
added tlk_convert, a python dialog.tlk converter to utf8
Browse files Browse the repository at this point in the history
currently used for conversion of chinese GBK tlk to a utf8 one to enable
the use of regular fonts

Signed-off-by: Jaka Kranjc <lynxlupodian@users.sourceforge.net>
  • Loading branch information
noword authored and lynxlynxlynx committed May 26, 2013
1 parent fe0c1cf commit 9b1fb5c
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 0 deletions.
33 changes: 33 additions & 0 deletions tools/tlk_convert/base.py
@@ -0,0 +1,33 @@
#! /usr/bin/env python
#coding=utf-8
import os

class Base:
SIGN = ""
def __init__(self, io=None):
if io:
self.load(io)

def load(self, io):
if io.read(len(self.SIGN)) != self.SIGN:
raise TypeError

self._load(io)

def _load(self, io):
raise NotImplementedError

def save(self, io):
io.write(self.SIGN)
self._save(io)

def _save(self, io):
raise NotImplementedError

def BaseFactory(io, class_list):
pos = io.tell()
for c in class_list:
sign = io.read(len(c.SIGN))
io.seek(pos, os.SEEK_SET)
if sign == c.SIGN:
return c(io)
49 changes: 49 additions & 0 deletions tools/tlk_convert/tlk.py
@@ -0,0 +1,49 @@
#! /usr/bin/env python
#coding=utf-8
from base import Base
from struct import unpack, pack
import os
import cStringIO
#http://gemrb.org/iesdp/file_formats/ie_formats/tlk_v1.htm

class Tlk(Base, list):
SIGN = "TLK V1 "
def _load(self, io):
self.language_id, num, offset = unpack("<HII", io.read(0xa))

for i in range(num):
self.append(dict(zip(("flag", "sound_name", "volume", "pitch", "offset", "length"),
unpack("<H 8s 4I", io.read(0x1a)))))

for t in self:
io.seek(offset+t["offset"], os.SEEK_SET)
t["string"] = io.read(t["length"])

def _save(self, io):
offset = len(self)*0x1a + 0x12
io.write(pack("<HII", self.language_id, len(self), offset))

string_io = cStringIO.StringIO()
for t in self:
t["length"] = len(t["string"])
if t["length"] == 0:
t["offset"] = 0
else:
t["offset"] = string_io.tell()

io.write(pack("<H 8s 4I", t["flag"], t["sound_name"], t["volume"], t["pitch"], t["offset"], t["length"]))
string_io.write(t["string"])

io.write(string_io.getvalue())

def __str__(self):
s = []
for i, t in enumerate(self):
s.append("%d %04x %8s %08x %08x %08x %08x %s"%(i, t["flag"], t["sound_name"].strip("\x00"), t["volume"], t["pitch"], t["offset"], t["length"], t["string"]))
return "\n".join(s)

if __name__ == "__main__":
import sys
t = Tlk(open(sys.argv[1], "rb"))
print t
#t.save(open("1.bin", "wb"))
56 changes: 56 additions & 0 deletions tools/tlk_convert/tlk_convert.py
@@ -0,0 +1,56 @@
#! /usr/bin/env python
#coding=utf-8
from tlk import Tlk

PUNCTUATIONS = u",。!“”-…,.!"

def insert_space(utf16_str, interval=1, codec = None):
if codec:
utf16_str = utf16_str.decode(codec)

utf16_str = utf16_str.replace(u" ", u" ")
words = []
word = u""
for i, u in enumerate(utf16_str):
word += u
if ord(u) > 0x100 \
and len(word) >= interval \
and (i+1 < len(utf16_str) and utf16_str[i+1] not in PUNCTUATIONS):
words.append(word)
word = u""
if len(word) > 0:
words.append(word)
s = u" ".join(words)
if codec:
s = s.encode(codec)
return s

def convert_to_utf8(tlk_name, codec = "GBK", need_space = True):
tlk = Tlk(open(tlk_name, "rb"))
tlk.save(open(tlk_name+".bak", "wb"))
for i, t in enumerate(tlk):
try:
txt = t["string"].decode(codec).encode("utf-8")
except:
print "Warning: ", i
continue
if need_space:
txt = insert_space(txt, codec="utf-8")
t["string"] = txt
tlk.save(open(tlk_name, "wb"))

if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()

parser.add_argument("name", action="store", nargs = 1)
parser.add_argument("codec", action="store", nargs = "?")
parser.add_argument("--disable_space", action="store_true", default = False)

args = parser.parse_args()
codec = args.codec
if not codec:
codec = "GBK"

convert_to_utf8(args.name[0], codec, not args.disable_space)

0 comments on commit 9b1fb5c

Please sign in to comment.