Skip to content

Commit

Permalink
Add u() function for Unicode strings for Python 2/3 shared source
Browse files Browse the repository at this point in the history
  • Loading branch information
daviddrysdale committed Nov 26, 2011
1 parent 7a7b98c commit e334775
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 4 deletions.
1 change: 1 addition & 0 deletions python/makefile
Expand Up @@ -29,6 +29,7 @@ tests/testdata/__init__.py: ../resources/PhoneNumberMetaDataForTesting.xml buil

test: alldata tests/testdata/__init__.py
$(PYTHON) -m phonenumbers.__init__
$(PYTHON) -m phonenumbers.util
$(PYTHON) -m phonenumbers.re_util
$(PYTHON) -m phonenumbers.unicode_util
$(PYTHON) -m phonenumbers.geocoder
Expand Down
71 changes: 67 additions & 4 deletions python/phonenumbers/util.py
@@ -1,9 +1,72 @@
#!/usr/bin/env python
"""Python 2.x/3.x compatibility utilities.
>>> from util import prnt, u, unichr
>>> prnt("hello")
hello
>>> prnt("hello", "world")
hello world
>>> prnt("hello", "world", sep=":")
hello:world
>>> prnt("hello", "world", sep=":", end='!\\n')
hello:world!
>>> u('\u0101') == unichr(0x0101)
True
>>> u('\u0101') == u('\U00000101')
True
>>> u('\u0101') == u('\N{LATIN SMALL LETTER A WITH MACRON}')
True
"""
import sys


class UnicodeMixin(object): # pragma no cover
"""Define __str__ operator in terms of __unicode__ for Python 2/3"""
if sys.version_info >= (3, 0):
if sys.version_info >= (3, 0): # pragma no cover
import builtins
print3 = builtins.__dict__['print']

u = str
unicode = str
unichr = chr

def prnt(*args, **kwargs):
sep = kwargs.get('sep', ' ')
end = kwargs.get('end', '\n')
file = kwargs.get('file', None)
print3(*args, sep=sep, end=end, file=file)

class UnicodeMixin(object):
__str__ = lambda x: x.__unicode__()
else:

else: # pragma no cover
import unicodedata
import re
# \N{name} = character named name in the Unicode database
_UNAME_RE = re.compile(r'\\N\{(?P<name>[^}]+)\}')
# \uxxxx = character with 16-bit hex value xxxx
_U16_RE = re.compile(r'\\u(?P<hexval>[0-9a-fA-F]{4})')
# \Uxxxxxxxx = character with 32-bit hex value xxxxxxxx
_U32_RE = re.compile(r'\\U(?P<hexval>[0-9a-fA-F]{8})')

def u(s):
us = re.sub(_U16_RE, lambda m: unichr(int(m.group('hexval'), 16)), unicode(s))
us = re.sub(_U32_RE, lambda m: unichr(int(m.group('hexval'), 16)), us)
us = re.sub(_UNAME_RE, lambda m: unicodedata.lookup(m.group('name')), us)
return us

unichr = unichr

def prnt(*args, **kwargs):
sep = kwargs.get('sep', ' ')
end = kwargs.get('end', '\n')
file = kwargs.get('file', None)
if file is None:
file = sys.stdout
print >> file, sep.join(args) + end,

class UnicodeMixin(object): # pragma no cover
__str__ = lambda x: unicode(x).encode('utf-8')


if __name__ == '__main__': # pragma no cover
import doctest
doctest.testmod()

0 comments on commit e334775

Please sign in to comment.