Skip to content

Commit

Permalink
Avoid rendering, then parsing diff.
Browse files Browse the repository at this point in the history
  • Loading branch information
jelmer committed Mar 22, 2017
1 parent be69325 commit c8300db
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 192 deletions.
238 changes: 74 additions & 164 deletions klaus/diff.py
Expand Up @@ -8,168 +8,78 @@
:copyright: 2007 by Armin Ronacher.
:license: BSD
"""
import re
from cgi import escape


def prepare_udiff(udiff, **kwargs):
"""Prepare an udiff for a template."""
return DiffRenderer(udiff).prepare(**kwargs)


class DiffRenderer(object):
"""Give it a unified diff and it renders you a beautiful
html diff :-)
"""
_chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')

def __init__(self, udiff):
""":param udiff: a text in udiff format"""
self.lines = [escape(line) for line in udiff.splitlines()]

def _extract_filename(self, line):
"""
Extract file name from unified diff line:
--- a/foo/bar ==> foo/bar
+++ b/foo/bar ==> foo/bar
"""
if line.startswith(("--- /dev/null", "+++ /dev/null")):
return line[len("--- "):]
else:
return line[len("--- a/"):]

def _highlight_line(self, line, next):
"""Highlight inline changes in both lines."""
start = 0
limit = min(len(line['line']), len(next['line']))
while start < limit and line['line'][start] == next['line'][start]:
start += 1
end = -1
limit -= start
while -end <= limit and line['line'][end] == next['line'][end]:
end -= 1
end += 1
if start or end:
def do(l):
last = end + len(l['line'])
if l['action'] == 'add':
tag = 'ins'
else:
tag = 'del'
l['line'] = u'%s<%s>%s</%s>%s' % (
l['line'][:start],
tag,
l['line'][start:last],
tag,
l['line'][last:]
)
do(line)
do(next)

def prepare(self, want_header=True):
"""Parse the diff an return data for the template."""
in_header = True
header = []
lineiter = iter(self.lines)
files = []
try:
line = next(lineiter)
while 1:
# continue until we found the old file
if not line.startswith('--- '):
if in_header:
header.append(line)
line = next(lineiter)
continue

if header and all(x.strip() for x in header):
if want_header:
files.append({'is_header': True, 'lines': header})
header = []

in_header = False
chunks = []
files.append({
'is_header': False,
'old_filename': self._extract_filename(line),
'new_filename': self._extract_filename(next(lineiter)),
'additions': 0,
'deletions': 0,
'chunks': chunks
})

line = next(lineiter)
while line:
match = self._chunk_re.match(line)
if not match:
in_header = True
break

lines = []
chunks.append(lines)

old_line, old_end, new_line, new_end = \
[int(x or 1) for x in match.groups()]
old_line -= 1
new_line -= 1
old_end += old_line
new_end += new_line
line = next(lineiter)

while old_line < old_end or new_line < new_end:
if line:
command, line = line[0], line[1:]
else:
command = ' '
affects_old = affects_new = False

if command == '+':
affects_new = True
action = 'add'
files[-1]['additions'] += 1
elif command == '-':
affects_old = True
action = 'del'
files[-1]['deletions'] += 1
else:
affects_old = affects_new = True
action = 'unmod'

old_line += affects_old
new_line += affects_new
lines.append({
'old_lineno': affects_old and old_line or u'',
'new_lineno': affects_new and new_line or u'',
'action': action,
'line': line,
'no_newline': False,
})

# Skip "no newline at end of file" markers
line = next(lineiter)
if line == r"\ No newline at end of file":
lines[-1]['no_newline'] = True
line = next(lineiter)

except StopIteration:
pass

# highlight inline changes
for file in files:
if file['is_header']:
continue
for chunk in file['chunks']:
lineiter = iter(chunk)
try:
while True:
line = next(lineiter)
if line['action'] != 'unmod':
nextline = next(lineiter)
if nextline['action'] == 'unmod' or \
nextline['action'] == line['action']:
continue
self._highlight_line(line, nextline)
except StopIteration:
pass

return files
from difflib import SequenceMatcher

def highlight_line(old_line, new_line):
"""Highlight inline changes in both lines."""
start = 0
limit = min(len(old_line), len(new_line))
while start < limit and old_line[start] == new_line[start]:
start += 1
end = -1
limit -= start
while -end <= limit and old_line[end] == new_line[end]:
end -= 1
end += 1
if start or end:
def do(l, tag):
last = end + len(l)
return b'%s<%s>%s</%s>%s' % (
l[:start],
tag,
l[start:last],
tag,
l[last:]
)
old_line = do(old_line, 'del')
new_line = do(new_line, 'ins')
return old_line, new_line


def render_diff(a, b, n=3):
"""Parse the diff an return data for the template."""
additions = 0
deletions = 0
chunks = []
for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):
old_line, old_end, new_line, new_end = group[0][1], group[-1][2], group[0][3], group[-1][4]
lines = []
def add_line(old_lineno, new_lineno, action, line):
if action == 'add':
additions += 1
if action == 'del':
deletions += 1
lines.append({
'old_lineno': old_lineno,
'new_lineno': new_lineno,
'action': action,
'line': line,
'no_newline': (line[-1:] != b'\n')
})
return lines[-1]
chunks.append(lines)
for tag, i1, i2, j1, j2 in group:
if tag == 'equal':
for c, line in enumerate(a[i1:i2]):
add_line(i1+c, j1+c, 'unmod', line)
elif tag == 'insert':
for c, line in enumerate(b[j1:j2]):
add_line(None, j1+c, 'add', line)
elif tag == 'delete':
for c, line in enumerate(a[i1:i2]):
add_line(i1+c, None, 'del', line)
elif tag == 'replace':
# TODO: not sure if this is the best way to deal with replace
# blocks, but it's consistent with the previous version.
for c, line in enumerate(a[i1:i2-1]):
add_line(i1+c, None, 'del', line)
old_line, new_line = highlight_line(a[i2-1], b[j1])
add_line(i2-1, None, 'del', old_line)
add_line(None, j1, 'add', new_line)
for c, line in enumerate(b[j1+1:j2]):
add_line(None, j1+c+1, 'add', line)
else:
raise AssertionError('unknown tag %s' % tag)

return additions, deletions, chunks
52 changes: 24 additions & 28 deletions klaus/repo.py
Expand Up @@ -3,11 +3,12 @@
import stat

from dulwich.object_store import tree_lookup_path
from dulwich.objects import Blob
from dulwich.errors import NotTreeError
import dulwich, dulwich.patch

from klaus.utils import check_output, force_unicode, parent_directory, encode_for_git, decode_from_git
from klaus.diff import prepare_udiff
from klaus.diff import render_diff


class FancyRepo(dulwich.repo.Repo):
Expand Down Expand Up @@ -192,43 +193,38 @@ def commit_diff(self, commit):
dulwich_changes = self.object_store.tree_changes(parent_tree, commit.tree)
for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in dulwich_changes:
summary['nfiles'] += 1

try:
# Check for binary files -- can't show diffs for these
if newsha and guess_is_binary(self[newsha]) or \
oldsha and guess_is_binary(self[oldsha]):
file_changes.append({
'is_binary': True,
'old_filename': oldpath or '/dev/null',
'new_filename': newpath or '/dev/null',
'chunks': None
})
continue
oldblob = self.object_store[oldsha] if oldsha else Blob.from_string(b'')
newblob = self.object_store[newsha] if newsha else Blob.from_string(b'')
except KeyError:
# newsha/oldsha are probably related to submodules.
# Dulwich will handle that.
pass

bytesio = io.BytesIO()
dulwich.patch.write_object_diff(bytesio, self.object_store,
(oldpath, oldmode, oldsha),
(newpath, newmode, newsha))
files = prepare_udiff(decode_from_git(bytesio.getvalue()), want_header=False)
if not files:
# the diff module doesn't handle deletions/additions
# of empty files correctly.
# Check for binary files -- can't show diffs for these
if guess_is_binary(newblob) or \
guess_is_binary(oldblob):
file_changes.append({
'is_binary': True,
'old_filename': oldpath or '/dev/null',
'new_filename': newpath or '/dev/null',
'chunks': [],
'additions': 0,
'deletions': 0,
'chunks': None
})
else:
change = files[0]
summary['nadditions'] += change['additions']
summary['ndeletions'] += change['deletions']
file_changes.append(change)
continue

additions, deletions, chunks = render_diff(
oldblob.splitlines(), newblob.splitlines())
change = {
'is_binary': False,
'old_filename': oldpath or '/dev/null',
'new_filename': newpath or '/dev/null',
'chunks': chunks,
'additions': additions,
'deletions': deletions,
}
summary['nadditions'] += additions
summary['ndeletions'] += deletions
file_changes.append(change)

return summary, file_changes

Expand Down

0 comments on commit c8300db

Please sign in to comment.