Skip to content

Commit

Permalink
Fix a bug in unicode encoding of reprs.
Browse files Browse the repository at this point in the history
The previous implementation could lead to double-decoding:
    >>> from parsimonious.utils import Token
    >>> repr(Token('asdf'))
    '<Token "asdf">'
    >>> repr(Token(u'💣'))
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "parsimonious/utils.py", line 19, in __repr__
        return self.__str__().encode('utf-8')
    UnicodeDecodeError: 'ascii' codec can't decode byte 0xf0 in position 8: ordinal not in range(128)
  • Loading branch information
lucaswiman committed Feb 11, 2017
1 parent aba7c13 commit c2789dd
Showing 1 changed file with 4 additions and 11 deletions.
15 changes: 4 additions & 11 deletions parsimonious/utils.py
@@ -1,22 +1,15 @@
"""General tools which don't depend on other parts of Parsimonious"""

import ast
from sys import version_info

from six import python_2_unicode_compatible


class StrAndRepr(object):
"""Mix-in to add a ``__str__`` and ``__repr__`` which return the
UTF-8-encoded value of ``__unicode__``"""

if version_info >= (3,):
# Don't return the "bytes" type from Python 3's __str__:
def __repr__(self):
return self.__str__()
else:
def __repr__(self):
return self.__str__().encode('utf-8')
"""Mix-in to which gives the class the same __repr__ and __str__."""

def __repr__(self):
return self.__str__()


def evaluate_string(string):
Expand Down

0 comments on commit c2789dd

Please sign in to comment.