From c94c32d24edcfe24ebb28cd7060cbede60015e69 Mon Sep 17 00:00:00 2001 From: c1ay Date: Mon, 24 Dec 2018 15:19:03 +0800 Subject: [PATCH 1/3] feature: support python3.5 python3.6, closes #5 --- .travis.yml | 2 ++ rich_text_diff/__init__.py | 37 ++++++++++++++++++------------------- setup.py | 2 +- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2591be6..9e43467 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,8 @@ language: python python: - '2.7' + - '3.5' + - '3.6' sudo: false install: python setup.py install diff --git a/rich_text_diff/__init__.py b/rich_text_diff/__init__.py index 9427e5c..8ef75c8 100644 --- a/rich_text_diff/__init__.py +++ b/rich_text_diff/__init__.py @@ -1,16 +1,21 @@ # -*- coding: utf-8 -*- import copy +import sys import logging import re -import urllib from bidict import bidict import diff_match_patch as dmp_module from lxml.html import fromstring, tostring, fragment_fromstring from lxml import etree +if sys.version_info < (3,): + chr = unichr + unicode_type = unicode +else: + unicode_type = str -UNICODE_KEY = [unichr(item) for item in range(0xE000, 0xFFFF + 1)] +UNICODE_KEY = [chr(item) for item in range(0xE000, 0xFFFF + 1)] # unicode spec not in use DMP = dmp_module.diff_match_patch() @@ -45,23 +50,23 @@ def _map_tag(self, content): self.tag_map[self.code_key.pop()] = tag def _map_media_tag(self, element, raw_tag): - url = urlencode(element.attrib) - if url in self.media_url.values(): - code = self.media_url.inv[url] + tag_key = gen_tag_key(element.attrib) + if tag_key in self.media_url.values(): + code = self.media_url.inv[tag_key] self.tag_map[code].append(raw_tag) return code = self.code_key.pop() self.tag_map[code] = [raw_tag] - self.media_url[code] = url + self.media_url[code] = tag_key def _replace(self, new_content, old_content): self._map_tag(new_content) - for code, tag in self.tag_map.iteritems(): + for code, tag in self.tag_map.items(): if not isinstance(tag, list): tag = [tag] for item in tag: new_content = new_content.replace(item, code) - for code, tag in self.tag_map.iteritems(): + for code, tag in self.tag_map.items(): if not isinstance(tag, list): tag = [tag] for item in tag: @@ -69,7 +74,7 @@ def _replace(self, new_content, old_content): return to_unicode(new_content), to_unicode(old_content) def _recover(self, content): - for code, tag in self.tag_map.iteritems(): + for code, tag in self.tag_map.items(): if isinstance(tag, list): tag = tag[0] content = content.replace(code, tag) @@ -96,7 +101,7 @@ def _diff(self, old_content, new_content): return "".join(html) -_TO_UNICODE_TYPES = (unicode, type(None)) +_TO_UNICODE_TYPES = (unicode_type, type(None)) def to_unicode(value): @@ -115,15 +120,9 @@ def ensure_closed_tag(html): except etree.ParserError as e: logging.warn('fromstring error: {}, use fragment_fromstring'.format(e)) element = fragment_fromstring(html, create_parent='div') - return tostring(element, encoding='utf-8') + return to_unicode(tostring(element, encoding='utf-8')) -def urlencode(query): - l = [] - for k, v in query.items(): - if isinstance(v, unicode): - v = v.encode('utf-8') - k = urllib.quote_plus(str(k)) - v = urllib.quote_plus(str(v)) - l.append(k + '=' + v) +def gen_tag_key(query): + l = ["{}={}".format(to_unicode(k), to_unicode(v)) for k, v in query.items()] return '&'.join(l) diff --git a/setup.py b/setup.py index ad47b7d..b95c931 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ description='support rich text diff', packages=['rich_text_diff'], test_suite='nose.collector', - python_requires='>=2.7', + python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*', long_description='readme', long_description_content_type='text/markdown', url='https://github.com/c1ay/rich_text_diff', From c0c9e226107d393a6c44b335aaef0cddb4dd8af3 Mon Sep 17 00:00:00 2001 From: c1ay Date: Mon, 24 Dec 2018 15:35:33 +0800 Subject: [PATCH 2/3] fix: unicode --- rich_text_diff/__init__.py | 11 ++++++++++- setup.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/rich_text_diff/__init__.py b/rich_text_diff/__init__.py index 8ef75c8..b544444 100644 --- a/rich_text_diff/__init__.py +++ b/rich_text_diff/__init__.py @@ -78,7 +78,7 @@ def _recover(self, content): if isinstance(tag, list): tag = tag[0] content = content.replace(code, tag) - return ensure_closed_tag(content) + return utf8(ensure_closed_tag(content)) def diff(self): if self.new_content == self.old_content: @@ -102,6 +102,7 @@ def _diff(self, old_content, new_content): _TO_UNICODE_TYPES = (unicode_type, type(None)) +_UTF8_TYPES = (bytes, type(None)) def to_unicode(value): @@ -114,6 +115,14 @@ def to_unicode(value): return value.decode("utf-8") +def utf8(value): + if isinstance(value, _UTF8_TYPES): + return value + if not isinstance(value, unicode_type): + raise TypeError("Expected bytes, unicode, or None; got %r" % type(value)) + return value.encode("utf-8") + + def ensure_closed_tag(html): try: element = fromstring(html) diff --git a/setup.py b/setup.py index b95c931..1e43542 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name='rich_text_diff', - version='0.0.5', + version='0.0.6', author='liukai', author_email='liukai@zhihu.com', description='support rich text diff', From e08037a4e89abc59a39daeba23ab85e6e974a678 Mon Sep 17 00:00:00 2001 From: c1ay Date: Mon, 24 Dec 2018 15:42:43 +0800 Subject: [PATCH 3/3] fix ci --- rich_text_diff/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rich_text_diff/__init__.py b/rich_text_diff/__init__.py index b544444..447b77f 100644 --- a/rich_text_diff/__init__.py +++ b/rich_text_diff/__init__.py @@ -78,7 +78,7 @@ def _recover(self, content): if isinstance(tag, list): tag = tag[0] content = content.replace(code, tag) - return utf8(ensure_closed_tag(content)) + return ensure_closed_tag(content) def diff(self): if self.new_content == self.old_content: @@ -93,12 +93,12 @@ def _diff(self, old_content, new_content): for (op, data) in diffs: text = self._recover(data) if op == self.INSERT: - html.append("{}".format(text)) + html.append(u"{}".format(text)) elif op == self.DELETE: - html.append("{}".format(text)) + html.append(u"{}".format(text)) elif op == self.EQUAL: html.append(text) - return "".join(html) + return utf8(u"".join(html)) _TO_UNICODE_TYPES = (unicode_type, type(None)) @@ -127,7 +127,7 @@ def ensure_closed_tag(html): try: element = fromstring(html) except etree.ParserError as e: - logging.warn('fromstring error: {}, use fragment_fromstring'.format(e)) + logging.warning('fromstring error: {}, use fragment_fromstring'.format(e)) element = fragment_fromstring(html, create_parent='div') return to_unicode(tostring(element, encoding='utf-8'))