Skip to content

Commit

Permalink
python: use utf_%d_be encoding on bigendian archs
Browse files Browse the repository at this point in the history
  • Loading branch information
danigm committed Apr 11, 2024
1 parent 483d1aa commit e11dfbc
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
5 changes: 3 additions & 2 deletions python/louis/__init__.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ function for information about how liblouis searches for these tables.
@author: Andre-Abush Clause <dev@andreabc.net>
"""

from sys import getfilesystemencoding, platform, version_info
from sys import byteorder, getfilesystemencoding, platform, version_info
from atexit import register
from ctypes import (
c_ushort,
Expand All @@ -60,6 +60,7 @@ except ImportError: # Unix/Cygwin
_loader, _functype = cdll, CFUNCTYPE
liblouis = _loader["###LIBLOUIS_SONAME###"]
_is_windows = platform == "win32"
_endianness = "be" if byteorder == "big" else "le"

# { Module Configuration
#: Specifies the charSize (in bytes) used by liblouis.
Expand All @@ -78,7 +79,7 @@ outlenMultiplier = 4 + wideCharBytes * 2
fileSystemEncoding = "mbcs" if _is_windows else getfilesystemencoding()
#: Specifies the encoding to use when converting from byte strings to unicode strings.
#: @type: str
conversionEncoding = "utf_%d_le" % (wideCharBytes * 8)
conversionEncoding = "utf_%d_%s" % (wideCharBytes * 8, _endianness)
# }

# Some general utility functions
Expand Down
16 changes: 16 additions & 0 deletions python/tests/test_louis.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,21 @@ def test_13(self):
def test_14(self):
self.assertEqual(louis.translateString(["en-ueb-g1.ctb", "tests/test.cti"], "a \ud83e\udd23 b"),
'a "<rolling on the floor laughing"> b')

class TestEndianness(unittest.TestCase):
def test_1(self):
self.assertEqual(louis.translate(["unicode.dis","en-chardefs.cti"], "abcdefghijklmnopqrstuvwxyz")[0],
"⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚⠅⠇⠍⠝⠕⠏⠟⠗⠎⠞⠥⠧⠺⠭⠽⠵")

def test_2(self):
# invert encoding
_encoding = louis.conversionEncoding
_endianness = "le" if louis._endianness == "be" else "be"
louis.conversionEncoding = "utf_%d_%s" % (louis.wideCharBytes * 8, _endianness)
with self.assertRaises(UnicodeDecodeError) as context:
self.assertEqual(louis.translate(["unicode.dis","en-chardefs.cti"], "abcdefghijklmnopqrstuvwxyz")[0],
"⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚⠅⠇⠍⠝⠕⠏⠟⠗⠎⠞⠥⠧⠺⠭⠽⠵")
louis.conversionEncoding = _encoding

if __name__ == '__main__':
unittest.main()

0 comments on commit e11dfbc

Please sign in to comment.