Skip to content

Commit

Permalink
First version with passing tests on Python 2.6, 2.7 and 3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanno Schlichting committed Dec 22, 2011
1 parent 548c0b0 commit 6945966
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 63 deletions.
4 changes: 3 additions & 1 deletion CHANGES.txt
@@ -1,9 +1,11 @@
Changelog
=========

1.3 - unreleased
2.0 - unreleased
----------------

- Python 2 and 3 compatibility in the same codebase.
[hannosch]

1.2 - 2011-11-01
----------------
Expand Down
126 changes: 74 additions & 52 deletions pythongettext/msgfmt.py
Expand Up @@ -33,6 +33,7 @@

import array
import codecs
from email.parser import HeaderParser
import struct
import sys

Expand All @@ -41,18 +42,26 @@
def b(s):
return s.encode("latin-1")

def u(s):
def u(s, enc=None):
return s

def header_charset(s):
p = HeaderParser()
return p.parsestr(s).get_content_charset()

import io
BytesIO = io.BytesIO
FILE_TYPE = io.IOBase
else:
def b(s):
return s

def u(s):
return unicode(s, "unicode_escape")
def u(s, enc="unicode_escape"):
return unicode(s, enc)

def header_charset(s):
p = HeaderParser()
return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset()

from cStringIO import StringIO as BytesIO
FILE_TYPE = file
Expand All @@ -75,6 +84,9 @@ def __init__(self, po, name='unknown'):
self.name = name
self.messages = {}
self.openfile = False
# Start off assuming latin-1, so everything decodes without failure,
# until we know the exact encoding
self.encoding = 'latin-1'

def readPoData(self):
""" read po data from self.po and return an iterator """
Expand All @@ -99,13 +111,18 @@ def readPoData(self):
return [first] + output.readlines()
return output

def add(self, context, id, str, fuzzy):
def add(self, context, id, string, fuzzy):
"Add a non-empty and non-fuzzy translation to the dictionary."
if str and not fuzzy:
if string and not fuzzy:
# The context is put before the id and separated by a EOT char.
if context:
id = context + '\x04' + id
self.messages[id] = str
id = context + u('\x04') + id
self.messages[id] = string
if not id:
# See whether there is an encoding declaration
charset = header_charset(string)
if charset:
self.encoding = charset

def generate(self):
"Return the generated output."
Expand All @@ -114,12 +131,14 @@ def generate(self):
offsets = []
ids = strs = b('')
for id in keys:
msg = self.messages[id].encode(self.encoding)
id = id.encode(self.encoding)
# For each string, we need size and file offset. Each string is
# NUL terminated; the NUL does not count into the size.
offsets.append((len(ids), len(id), len(strs),
len(self.messages[id])))
len(msg)))
ids += id + b('\0')
strs += self.messages[id] + b('\0')
strs += msg + b('\0')
output = b('')
# The header is 7 32-bit unsigned integers. We don't use hash tables,
# so the keys start right after the index tables.
Expand All @@ -144,7 +163,10 @@ def generate(self):
7 * 4, # start of key index
7 * 4 + len(keys) * 8, # start of value index
0, keystart) # size and offset of hash table
output += array.array("i", offsets).tostring()
if PY3:
output += array.array("i", offsets).tobytes()
else:
output += array.array("i", offsets).tostring()
output += ids
output += strs
return output
Expand All @@ -163,17 +185,17 @@ def read(self, header_only=False):

section = None
fuzzy = 0
msgid = msgstr = msgctxt = ''
msgid = msgstr = msgctxt = u('')

# Parse the catalog
lno = 0
for l in self.readPoData():
l = l.decode(self.encoding)
lno += 1
# If we get a comment line after a msgstr or a line starting with
# msgid or msgctxt, this is a new entry
if section == STR and (l[0] == '#' or (l[0] == 'm' and
(l.startswith('msgctxt') or l.startswith('msgid')))):

self.add(msgctxt, msgid, msgstr, fuzzy)
section = None
fuzzy = 0
Expand All @@ -187,47 +209,46 @@ def read(self, header_only=False):
if l[0] == '#':
continue
# Now we are in a msgctxt section
elif l[0] == 'm':
if l.startswith('msgctxt'):
section = CTXT
l = l[7:]
msgctxt = ''
# Now we are in a msgid section, output previous section
elif (l.startswith('msgid') and
not l.startswith('msgid_plural')):
if section == STR:
self.add(msgid, msgstr, fuzzy)
section = ID
l = l[5:]
msgid = msgstr = ''
is_plural = False
# This is a message with plural forms
elif l.startswith('msgid_plural'):
if section != ID:
raise PoSyntaxError('msgid_plural not preceeded by '
'msgid on line %d of po file %s' %
if l.startswith('msgctxt'):
section = CTXT
l = l[7:]
msgctxt = u('')
# Now we are in a msgid section, output previous section
elif (l.startswith('msgid') and
not l.startswith('msgid_plural')):
if section == STR:
self.add(msgid, msgstr, fuzzy)
section = ID
l = l[5:]
msgid = msgstr = u('')
is_plural = False
# This is a message with plural forms
elif l.startswith('msgid_plural'):
if section != ID:
raise PoSyntaxError('msgid_plural not preceeded by '
'msgid on line %d of po file %s' %
(lno, repr(self.name)))
l = l[12:]
msgid += b('\0') # separator of singular and plural
is_plural = True
# Now we are in a msgstr section
elif l.startswith('msgstr'):
section = STR
if l.startswith('msgstr['):
if not is_plural:
raise PoSyntaxError('plural without msgid_plural '
'on line %d of po file %s' %
(lno, repr(self.name)))
l = l.split(']', 1)[1]
if msgstr:
# Separator of the various plural forms
msgstr += b('\0')
else:
if is_plural:
raise PoSyntaxError('indexed msgstr required for '
'plural on line %d of po file %s' %
(lno, repr(self.name)))
l = l[12:]
msgid += '\0' # separator of singular and plural
is_plural = True
# Now we are in a msgstr section
elif l.startswith('msgstr'):
section = STR
if l.startswith('msgstr['):
if not is_plural:
raise PoSyntaxError('plural without msgid_plural '
'on line %d of po file %s' %
(lno, repr(self.name)))
l = l.split(']', 1)[1]
if msgstr:
# Separator of the various plural forms
msgstr += '\0'
else:
if is_plural:
raise PoSyntaxError('indexed msgstr required for '
'plural on line %d of po file %s' %
(lno, repr(self.name)))
l = l[6:]
l = l[6:]
# Skip empty lines
l = l.strip()
if not l:
Expand All @@ -238,6 +259,7 @@ def read(self, header_only=False):
except Exception as msg:
raise PoSyntaxError('%s (line %d of po file %s): \n%s' %
(msg, lno, repr(self.name), l))
l = u(l, self.encoding)
if section == CTXT:
msgctxt += l
elif section == ID:
Expand Down
33 changes: 24 additions & 9 deletions pythongettext/tests/test_compile.py
Expand Up @@ -35,7 +35,7 @@ def compare_po_mo(self, poname, moname):
po_file = open(os.path.join(FOLDER, poname), 'rb')
po = Msgfmt(po_file).get()
mo_file = open(os.path.join(FOLDER, moname), 'rb')
mo = ''.join(mo_file.readlines())
mo = b('').join(mo_file.readlines())
finally:
if po_file is not None:
po_file.close()
Expand All @@ -60,26 +60,41 @@ def test_test4(self):
po_file = open(os.path.join(FOLDER, 'test4.po'), 'rb')
po = Msgfmt(po_file)
po.read(header_only=True)
self.assertTrue(po.messages[''].startswith('Project-Id-Version: foo'))
po_file.close()
self.assertTrue(
po.messages[u('')].startswith('Project-Id-Version: foo'))
self.assertEqual(po.encoding, u('iso-8859-1'))

def test_test5(self):
po_file = open(os.path.join(FOLDER, 'test5.po'), 'rb')
po = Msgfmt(po_file)
with self.assertRaises(PoSyntaxError):
po.read()
try:
with self.assertRaises(PoSyntaxError):
po.read()
finally:
po_file.close()
self.assertEqual(po.encoding, u('utf-8'))

def test_test5_unicode_name(self):
po_file = open(os.path.join(FOLDER, 'test5.po'), 'rb')
po = Msgfmt(po_file, name=u('dømain', 'utf-8'))
with self.assertRaises(PoSyntaxError):
po.read()
try:
with self.assertRaises(PoSyntaxError):
po.read()
finally:
po_file.close()
self.assertEqual(po.encoding, u('utf-8'))

def test_escape(self):
po_file = open(os.path.join(FOLDER, 'test_escape.po'), 'rb')
po = Msgfmt(po_file)
with self.assertRaises(PoSyntaxError) as e:
po.read()
self.assertTrue('line 19' in e.exception.msg)
try:
with self.assertRaises(PoSyntaxError) as e:
po.read()
self.assertTrue('line 19' in e.exception.msg)
self.assertEqual(po.encoding, u('utf-8'))
finally:
po_file.close()

def test_unicode_bom(self):
self.compare_po_mo('test_unicode_bom.po', 'test_unicode_bom.mo')
4 changes: 3 additions & 1 deletion setup.py
Expand Up @@ -2,7 +2,7 @@
import sys
from setuptools import setup

version = '1.3dev'
version = '2.0dev'

PY3 = sys.version_info[0] == 3
install_requires = []
Expand All @@ -28,6 +28,8 @@
'Programming Language :: Python',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Topic :: Software Development :: Internationalization',
'Topic :: Software Development :: Localization',
],
Expand Down

0 comments on commit 6945966

Please sign in to comment.