From c2789dd620a6f6d4c08e4608e94656a0d2b906c7 Mon Sep 17 00:00:00 2001 From: Lucas Wiman Date: Sat, 11 Feb 2017 12:20:56 -0800 Subject: [PATCH] Fix a bug in unicode encoding of reprs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation could lead to double-decoding: >>> from parsimonious.utils import Token >>> repr(Token('asdf')) '' >>> repr(Token(u'💣')) Traceback (most recent call last): File "", line 1, in File "parsimonious/utils.py", line 19, in __repr__ return self.__str__().encode('utf-8') UnicodeDecodeError: 'ascii' codec can't decode byte 0xf0 in position 8: ordinal not in range(128) --- parsimonious/utils.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/parsimonious/utils.py b/parsimonious/utils.py index e7087bd..bb3e217 100644 --- a/parsimonious/utils.py +++ b/parsimonious/utils.py @@ -1,22 +1,15 @@ """General tools which don't depend on other parts of Parsimonious""" import ast -from sys import version_info from six import python_2_unicode_compatible class StrAndRepr(object): - """Mix-in to add a ``__str__`` and ``__repr__`` which return the - UTF-8-encoded value of ``__unicode__``""" - - if version_info >= (3,): - # Don't return the "bytes" type from Python 3's __str__: - def __repr__(self): - return self.__str__() - else: - def __repr__(self): - return self.__str__().encode('utf-8') + """Mix-in to which gives the class the same __repr__ and __str__.""" + + def __repr__(self): + return self.__str__() def evaluate_string(string):