Permalink
Browse files

Add u() function for Unicode strings for Python 2/3 shared source

  • Loading branch information...
1 parent 7a7b98c commit e334775b77838a9693409ab8ce52a218cf8f80dc @daviddrysdale committed Nov 26, 2011
Showing with 68 additions and 4 deletions.
  1. +1 −0 python/makefile
  2. +67 −4 python/phonenumbers/util.py
View
@@ -29,6 +29,7 @@ tests/testdata/__init__.py: ../resources/PhoneNumberMetaDataForTesting.xml buil
test: alldata tests/testdata/__init__.py
$(PYTHON) -m phonenumbers.__init__
+ $(PYTHON) -m phonenumbers.util
$(PYTHON) -m phonenumbers.re_util
$(PYTHON) -m phonenumbers.unicode_util
$(PYTHON) -m phonenumbers.geocoder
@@ -1,9 +1,72 @@
+#!/usr/bin/env python
+"""Python 2.x/3.x compatibility utilities.
+
+>>> from util import prnt, u, unichr
+>>> prnt("hello")
+hello
+>>> prnt("hello", "world")
+hello world
+>>> prnt("hello", "world", sep=":")
+hello:world
+>>> prnt("hello", "world", sep=":", end='!\\n')
+hello:world!
+>>> u('\u0101') == unichr(0x0101)
+True
+>>> u('\u0101') == u('\U00000101')
+True
+>>> u('\u0101') == u('\N{LATIN SMALL LETTER A WITH MACRON}')
+True
+"""
import sys
-class UnicodeMixin(object): # pragma no cover
- """Define __str__ operator in terms of __unicode__ for Python 2/3"""
- if sys.version_info >= (3, 0):
+if sys.version_info >= (3, 0): # pragma no cover
+ import builtins
+ print3 = builtins.__dict__['print']
+
+ u = str
+ unicode = str
+ unichr = chr
+
+ def prnt(*args, **kwargs):
+ sep = kwargs.get('sep', ' ')
+ end = kwargs.get('end', '\n')
+ file = kwargs.get('file', None)
+ print3(*args, sep=sep, end=end, file=file)
+
+ class UnicodeMixin(object):
__str__ = lambda x: x.__unicode__()
- else:
+
+else: # pragma no cover
+ import unicodedata
+ import re
+ # \N{name} = character named name in the Unicode database
+ _UNAME_RE = re.compile(r'\\N\{(?P<name>[^}]+)\}')
+ # \uxxxx = character with 16-bit hex value xxxx
+ _U16_RE = re.compile(r'\\u(?P<hexval>[0-9a-fA-F]{4})')
+ # \Uxxxxxxxx = character with 32-bit hex value xxxxxxxx
+ _U32_RE = re.compile(r'\\U(?P<hexval>[0-9a-fA-F]{8})')
+
+ def u(s):
+ us = re.sub(_U16_RE, lambda m: unichr(int(m.group('hexval'), 16)), unicode(s))
+ us = re.sub(_U32_RE, lambda m: unichr(int(m.group('hexval'), 16)), us)
+ us = re.sub(_UNAME_RE, lambda m: unicodedata.lookup(m.group('name')), us)
+ return us
+
+ unichr = unichr
+
+ def prnt(*args, **kwargs):
+ sep = kwargs.get('sep', ' ')
+ end = kwargs.get('end', '\n')
+ file = kwargs.get('file', None)
+ if file is None:
+ file = sys.stdout
+ print >> file, sep.join(args) + end,
+
+ class UnicodeMixin(object): # pragma no cover
__str__ = lambda x: unicode(x).encode('utf-8')
+
+
+if __name__ == '__main__': # pragma no cover
+ import doctest
+ doctest.testmod()

0 comments on commit e334775

Please sign in to comment.