Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
language: python
python:
- '2.7'
- '3.5'
- '3.6'
sudo: false
install:
python setup.py install
Expand Down
54 changes: 31 additions & 23 deletions rich_text_diff/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
# -*- coding: utf-8 -*-
import copy
import sys
import logging
import re
import urllib

from bidict import bidict
import diff_match_patch as dmp_module
from lxml.html import fromstring, tostring, fragment_fromstring
from lxml import etree

if sys.version_info < (3,):
chr = unichr
unicode_type = unicode
else:
unicode_type = str

UNICODE_KEY = [unichr(item) for item in range(0xE000, 0xFFFF + 1)]
UNICODE_KEY = [chr(item) for item in range(0xE000, 0xFFFF + 1)]
# unicode spec not in use
DMP = dmp_module.diff_match_patch()

Expand Down Expand Up @@ -45,31 +50,31 @@ def _map_tag(self, content):
self.tag_map[self.code_key.pop()] = tag

def _map_media_tag(self, element, raw_tag):
url = urlencode(element.attrib)
if url in self.media_url.values():
code = self.media_url.inv[url]
tag_key = gen_tag_key(element.attrib)
if tag_key in self.media_url.values():
code = self.media_url.inv[tag_key]
self.tag_map[code].append(raw_tag)
return
code = self.code_key.pop()
self.tag_map[code] = [raw_tag]
self.media_url[code] = url
self.media_url[code] = tag_key

def _replace(self, new_content, old_content):
self._map_tag(new_content)
for code, tag in self.tag_map.iteritems():
for code, tag in self.tag_map.items():
if not isinstance(tag, list):
tag = [tag]
for item in tag:
new_content = new_content.replace(item, code)
for code, tag in self.tag_map.iteritems():
for code, tag in self.tag_map.items():
if not isinstance(tag, list):
tag = [tag]
for item in tag:
old_content = old_content.replace(item, code)
return to_unicode(new_content), to_unicode(old_content)

def _recover(self, content):
for code, tag in self.tag_map.iteritems():
for code, tag in self.tag_map.items():
if isinstance(tag, list):
tag = tag[0]
content = content.replace(code, tag)
Expand All @@ -88,15 +93,16 @@ def _diff(self, old_content, new_content):
for (op, data) in diffs:
text = self._recover(data)
if op == self.INSERT:
html.append("<ins style=\"background:#e6ffe6;\">{}</ins>".format(text))
html.append(u"<ins style=\"background:#e6ffe6;\">{}</ins>".format(text))
elif op == self.DELETE:
html.append("<del style=\"background:#ffe6e6;\">{}</del>".format(text))
html.append(u"<del style=\"background:#ffe6e6;\">{}</del>".format(text))
elif op == self.EQUAL:
html.append(text)
return "".join(html)
return utf8(u"".join(html))


_TO_UNICODE_TYPES = (unicode, type(None))
_TO_UNICODE_TYPES = (unicode_type, type(None))
_UTF8_TYPES = (bytes, type(None))


def to_unicode(value):
Expand All @@ -109,21 +115,23 @@ def to_unicode(value):
return value.decode("utf-8")


def utf8(value):
if isinstance(value, _UTF8_TYPES):
return value
if not isinstance(value, unicode_type):
raise TypeError("Expected bytes, unicode, or None; got %r" % type(value))
return value.encode("utf-8")


def ensure_closed_tag(html):
try:
element = fromstring(html)
except etree.ParserError as e:
logging.warn('fromstring error: {}, use fragment_fromstring'.format(e))
logging.warning('fromstring error: {}, use fragment_fromstring'.format(e))
element = fragment_fromstring(html, create_parent='div')
return tostring(element, encoding='utf-8')
return to_unicode(tostring(element, encoding='utf-8'))


def urlencode(query):
l = []
for k, v in query.items():
if isinstance(v, unicode):
v = v.encode('utf-8')
k = urllib.quote_plus(str(k))
v = urllib.quote_plus(str(v))
l.append(k + '=' + v)
def gen_tag_key(query):
l = ["{}={}".format(to_unicode(k), to_unicode(v)) for k, v in query.items()]
return '&'.join(l)
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

setup(
name='rich_text_diff',
version='0.0.5',
version='0.0.6',
author='liukai',
author_email='liukai@zhihu.com',
description='support rich text diff',
packages=['rich_text_diff'],
test_suite='nose.collector',
python_requires='>=2.7',
python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*',
long_description='readme',
long_description_content_type='text/markdown',
url='https://github.com/c1ay/rich_text_diff',
Expand Down