diff --git a/.travis.yml b/.travis.yml index 2591be6..9e43467 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,8 @@ language: python python: - '2.7' + - '3.5' + - '3.6' sudo: false install: python setup.py install diff --git a/rich_text_diff/__init__.py b/rich_text_diff/__init__.py index 9427e5c..447b77f 100644 --- a/rich_text_diff/__init__.py +++ b/rich_text_diff/__init__.py @@ -1,16 +1,21 @@ # -*- coding: utf-8 -*- import copy +import sys import logging import re -import urllib from bidict import bidict import diff_match_patch as dmp_module from lxml.html import fromstring, tostring, fragment_fromstring from lxml import etree +if sys.version_info < (3,): + chr = unichr + unicode_type = unicode +else: + unicode_type = str -UNICODE_KEY = [unichr(item) for item in range(0xE000, 0xFFFF + 1)] +UNICODE_KEY = [chr(item) for item in range(0xE000, 0xFFFF + 1)] # unicode spec not in use DMP = dmp_module.diff_match_patch() @@ -45,23 +50,23 @@ def _map_tag(self, content): self.tag_map[self.code_key.pop()] = tag def _map_media_tag(self, element, raw_tag): - url = urlencode(element.attrib) - if url in self.media_url.values(): - code = self.media_url.inv[url] + tag_key = gen_tag_key(element.attrib) + if tag_key in self.media_url.values(): + code = self.media_url.inv[tag_key] self.tag_map[code].append(raw_tag) return code = self.code_key.pop() self.tag_map[code] = [raw_tag] - self.media_url[code] = url + self.media_url[code] = tag_key def _replace(self, new_content, old_content): self._map_tag(new_content) - for code, tag in self.tag_map.iteritems(): + for code, tag in self.tag_map.items(): if not isinstance(tag, list): tag = [tag] for item in tag: new_content = new_content.replace(item, code) - for code, tag in self.tag_map.iteritems(): + for code, tag in self.tag_map.items(): if not isinstance(tag, list): tag = [tag] for item in tag: @@ -69,7 +74,7 @@ def _replace(self, new_content, old_content): return to_unicode(new_content), to_unicode(old_content) def _recover(self, content): - for code, tag in self.tag_map.iteritems(): + for code, tag in self.tag_map.items(): if isinstance(tag, list): tag = tag[0] content = content.replace(code, tag) @@ -88,15 +93,16 @@ def _diff(self, old_content, new_content): for (op, data) in diffs: text = self._recover(data) if op == self.INSERT: - html.append("{}".format(text)) + html.append(u"{}".format(text)) elif op == self.DELETE: - html.append("{}".format(text)) + html.append(u"{}".format(text)) elif op == self.EQUAL: html.append(text) - return "".join(html) + return utf8(u"".join(html)) -_TO_UNICODE_TYPES = (unicode, type(None)) +_TO_UNICODE_TYPES = (unicode_type, type(None)) +_UTF8_TYPES = (bytes, type(None)) def to_unicode(value): @@ -109,21 +115,23 @@ def to_unicode(value): return value.decode("utf-8") +def utf8(value): + if isinstance(value, _UTF8_TYPES): + return value + if not isinstance(value, unicode_type): + raise TypeError("Expected bytes, unicode, or None; got %r" % type(value)) + return value.encode("utf-8") + + def ensure_closed_tag(html): try: element = fromstring(html) except etree.ParserError as e: - logging.warn('fromstring error: {}, use fragment_fromstring'.format(e)) + logging.warning('fromstring error: {}, use fragment_fromstring'.format(e)) element = fragment_fromstring(html, create_parent='div') - return tostring(element, encoding='utf-8') + return to_unicode(tostring(element, encoding='utf-8')) -def urlencode(query): - l = [] - for k, v in query.items(): - if isinstance(v, unicode): - v = v.encode('utf-8') - k = urllib.quote_plus(str(k)) - v = urllib.quote_plus(str(v)) - l.append(k + '=' + v) +def gen_tag_key(query): + l = ["{}={}".format(to_unicode(k), to_unicode(v)) for k, v in query.items()] return '&'.join(l) diff --git a/setup.py b/setup.py index ad47b7d..1e43542 100644 --- a/setup.py +++ b/setup.py @@ -3,13 +3,13 @@ setup( name='rich_text_diff', - version='0.0.5', + version='0.0.6', author='liukai', author_email='liukai@zhihu.com', description='support rich text diff', packages=['rich_text_diff'], test_suite='nose.collector', - python_requires='>=2.7', + python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*', long_description='readme', long_description_content_type='text/markdown', url='https://github.com/c1ay/rich_text_diff',