diff --git a/.travis.yml b/.travis.yml
index 2591be6..9e43467 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,8 @@
language: python
python:
- '2.7'
+ - '3.5'
+ - '3.6'
sudo: false
install:
python setup.py install
diff --git a/rich_text_diff/__init__.py b/rich_text_diff/__init__.py
index 9427e5c..447b77f 100644
--- a/rich_text_diff/__init__.py
+++ b/rich_text_diff/__init__.py
@@ -1,16 +1,21 @@
# -*- coding: utf-8 -*-
import copy
+import sys
import logging
import re
-import urllib
from bidict import bidict
import diff_match_patch as dmp_module
from lxml.html import fromstring, tostring, fragment_fromstring
from lxml import etree
+if sys.version_info < (3,):
+ chr = unichr
+ unicode_type = unicode
+else:
+ unicode_type = str
-UNICODE_KEY = [unichr(item) for item in range(0xE000, 0xFFFF + 1)]
+UNICODE_KEY = [chr(item) for item in range(0xE000, 0xFFFF + 1)]
# unicode spec not in use
DMP = dmp_module.diff_match_patch()
@@ -45,23 +50,23 @@ def _map_tag(self, content):
self.tag_map[self.code_key.pop()] = tag
def _map_media_tag(self, element, raw_tag):
- url = urlencode(element.attrib)
- if url in self.media_url.values():
- code = self.media_url.inv[url]
+ tag_key = gen_tag_key(element.attrib)
+ if tag_key in self.media_url.values():
+ code = self.media_url.inv[tag_key]
self.tag_map[code].append(raw_tag)
return
code = self.code_key.pop()
self.tag_map[code] = [raw_tag]
- self.media_url[code] = url
+ self.media_url[code] = tag_key
def _replace(self, new_content, old_content):
self._map_tag(new_content)
- for code, tag in self.tag_map.iteritems():
+ for code, tag in self.tag_map.items():
if not isinstance(tag, list):
tag = [tag]
for item in tag:
new_content = new_content.replace(item, code)
- for code, tag in self.tag_map.iteritems():
+ for code, tag in self.tag_map.items():
if not isinstance(tag, list):
tag = [tag]
for item in tag:
@@ -69,7 +74,7 @@ def _replace(self, new_content, old_content):
return to_unicode(new_content), to_unicode(old_content)
def _recover(self, content):
- for code, tag in self.tag_map.iteritems():
+ for code, tag in self.tag_map.items():
if isinstance(tag, list):
tag = tag[0]
content = content.replace(code, tag)
@@ -88,15 +93,16 @@ def _diff(self, old_content, new_content):
for (op, data) in diffs:
text = self._recover(data)
if op == self.INSERT:
- html.append("{}".format(text))
+ html.append(u"{}".format(text))
elif op == self.DELETE:
- html.append("{}".format(text))
+ html.append(u"{}".format(text))
elif op == self.EQUAL:
html.append(text)
- return "".join(html)
+ return utf8(u"".join(html))
-_TO_UNICODE_TYPES = (unicode, type(None))
+_TO_UNICODE_TYPES = (unicode_type, type(None))
+_UTF8_TYPES = (bytes, type(None))
def to_unicode(value):
@@ -109,21 +115,23 @@ def to_unicode(value):
return value.decode("utf-8")
+def utf8(value):
+ if isinstance(value, _UTF8_TYPES):
+ return value
+ if not isinstance(value, unicode_type):
+ raise TypeError("Expected bytes, unicode, or None; got %r" % type(value))
+ return value.encode("utf-8")
+
+
def ensure_closed_tag(html):
try:
element = fromstring(html)
except etree.ParserError as e:
- logging.warn('fromstring error: {}, use fragment_fromstring'.format(e))
+ logging.warning('fromstring error: {}, use fragment_fromstring'.format(e))
element = fragment_fromstring(html, create_parent='div')
- return tostring(element, encoding='utf-8')
+ return to_unicode(tostring(element, encoding='utf-8'))
-def urlencode(query):
- l = []
- for k, v in query.items():
- if isinstance(v, unicode):
- v = v.encode('utf-8')
- k = urllib.quote_plus(str(k))
- v = urllib.quote_plus(str(v))
- l.append(k + '=' + v)
+def gen_tag_key(query):
+ l = ["{}={}".format(to_unicode(k), to_unicode(v)) for k, v in query.items()]
return '&'.join(l)
diff --git a/setup.py b/setup.py
index ad47b7d..1e43542 100644
--- a/setup.py
+++ b/setup.py
@@ -3,13 +3,13 @@
setup(
name='rich_text_diff',
- version='0.0.5',
+ version='0.0.6',
author='liukai',
author_email='liukai@zhihu.com',
description='support rich text diff',
packages=['rich_text_diff'],
test_suite='nose.collector',
- python_requires='>=2.7',
+ python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*',
long_description='readme',
long_description_content_type='text/markdown',
url='https://github.com/c1ay/rich_text_diff',