Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Documentation/config/diff.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,16 @@ endif::git-diff[]
Set this option to `true` to make the diff driver cache the text
conversion outputs. See linkgit:gitattributes[5] for details.

`diff.<driver>.process`::
The command to run as a long-running diff process.
The tool communicates via the pkt-line protocol and returns
hunks that are fed into Git's diff and blame pipelines.
If the tool returns zero hunks, the file is treated as
unchanged for both diff output and blame attribution.
Git provides `git diff-process-normalize` as a built-in
tool that detects whitespace-only changes.
See linkgit:gitattributes[5] for details.

`diff.indentHeuristic`::
Set this option to `false` to disable the default heuristics
that shift diff hunk boundaries to make patches easier to read.
Expand Down
58 changes: 58 additions & 0 deletions Documentation/gitattributes.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,64 @@ NOTE: If `diff.<name>.command` is defined for path with the
(see above), and adding `diff.<name>.algorithm` has no effect, as the
algorithm is not passed to the external diff driver.

Using an external diff process
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

An external tool can provide content-aware line matching by
setting `diff.<name>.process` to the command that runs
the tool. The tool is a long-running process that communicates via
the pkt-line protocol (see
linkgit:gitprotocol-long-running-process[5]).

------------------------
*.c diff=cdiff
------------------------

----------------------------------------------------------------
[diff "cdiff"]
process = /path/to/diff-process-tool
----------------------------------------------------------------

The tool receives file pairs and returns hunk descriptors indicating
which lines changed. Git feeds these hunks into its standard diff
pipeline, so all output features (word diff, function context,
color) work normally.

If the tool fails or returns an error, Git silently falls back to
the builtin diff algorithm. If the tool returns invalid hunks
(out of bounds, overlapping), Git also falls back silently.

The handshake negotiates `version=1` and `capability=hunks`.
Per-file requests send `command=hunks` and `pathname=<path>`,
followed by the old and new file content as packetized data.
The tool responds with lines of the form
`hunk <old_start> <old_count> <new_start> <new_count>`
(1-based line numbers), a flush packet, and `status=success`.

If the tool returns zero hunks with `status=success`, Git treats
the file as having no changes and produces no diff output.
`git blame` also consults the diff process and skips commits
where it reports zero hunks, attributing lines to earlier commits
instead.

Git ships with a built-in diff process, `git diff-process-normalize`,
that detects whitespace-only changes. Files whose only differences
are whitespace produce zero hunks; files with non-whitespace changes
fall back to the builtin diff algorithm. To use it:

----------------------------------------------------------------
[diff "cdiff"]
process = git diff-process-normalize
----------------------------------------------------------------

This is useful after running a code formatter: `git diff` shows
no output for files that only had whitespace changes,
`git blame` skips whitespace-only commits automatically without
requiring a `.git-blame-ignore-revs` file.

Tools should ignore unknown keys in the per-file request to
remain forward-compatible.

Defining a custom hunk-header
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,7 @@ LIB_OBJS += diff-delta.o
LIB_OBJS += diff-merges.o
LIB_OBJS += diff-lib.o
LIB_OBJS += diff-no-index.o
LIB_OBJS += diff-process.o
LIB_OBJS += diff.o
LIB_OBJS += diffcore-break.o
LIB_OBJS += diffcore-delta.o
Expand Down Expand Up @@ -1408,6 +1409,7 @@ BUILTIN_OBJS += builtin/diagnose.o
BUILTIN_OBJS += builtin/diff-files.o
BUILTIN_OBJS += builtin/diff-index.o
BUILTIN_OBJS += builtin/diff-pairs.o
BUILTIN_OBJS += builtin/diff-process-normalize.o
BUILTIN_OBJS += builtin/diff-tree.o
BUILTIN_OBJS += builtin/diff.o
BUILTIN_OBJS += builtin/difftool.o
Expand Down
43 changes: 39 additions & 4 deletions blame.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "tag.h"
#include "trace2.h"
#include "blame.h"
#include "diff-process.h"
#include "userdiff.h"
#include "alloc.h"
#include "commit-slab.h"
#include "bloom.h"
Expand Down Expand Up @@ -315,16 +317,47 @@ static struct commit *fake_working_tree_commit(struct repository *r,


static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts)
xdl_emit_hunk_consume_func_t hunk_func, void *cb_data,
int xdl_opts, struct index_state *istate,
const char *path)
{
xpparam_t xpp = {0};
xdemitconf_t xecfg = {0};
xdemitcb_t ecb = {NULL};
struct xdl_hunk *ext_hunks = NULL;
int ret;

xpp.flags = xdl_opts;
xecfg.hunk_func = hunk_func;
ecb.priv = cb_data;
return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);

if (path && istate) {
struct userdiff_driver *drv;
drv = userdiff_find_by_path(istate, path);
if (drv && drv->process) {
size_t nr = 0;
if (!diff_process_get_hunks(drv, path,
file_a->ptr, file_a->size,
file_b->ptr, file_b->size,
&ext_hunks, &nr)) {
if (!nr) {
/*
* Zero hunks: the diff process
* considers these files equivalent.
* Skip so blame looks past this
* commit.
*/
return 0;
}
xpp.external_hunks = ext_hunks;
xpp.external_hunks_nr = nr;
}
}
}

ret = xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
free(ext_hunks);
return ret;
}

static const char *get_next_line(const char *start, const char *end)
Expand Down Expand Up @@ -1961,7 +1994,8 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
&sb->num_read_blob, ignore_diffs);
sb->num_get_patch++;

if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts,
sb->revs->diffopt.repo->index, target->path))
die("unable to generate diff (%s -> %s)",
oid_to_hex(&parent->commit->object.oid),
oid_to_hex(&target->commit->object.oid));
Expand Down Expand Up @@ -2114,7 +2148,8 @@ static void find_copy_in_blob(struct blame_scoreboard *sb,
* file_p partially may match that image.
*/
memset(split, 0, sizeof(struct blame_entry [3]));
if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts))
if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts,
NULL, NULL))
die("unable to generate diff (%s)",
oid_to_hex(&parent->commit->object.oid));
/* remainder, if any, all match the preimage */
Expand Down
1 change: 1 addition & 0 deletions builtin.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ int cmd_diff_files(int argc, const char **argv, const char *prefix, struct repos
int cmd_diff_index(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_diff(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_diff_pairs(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_diff_process_normalize(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_diff_tree(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_difftool(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_env__helper(int argc, const char **argv, const char *prefix, struct repository *repo);
Expand Down
143 changes: 143 additions & 0 deletions builtin/diff-process-normalize.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
* Built-in diff process that returns zero hunks for files whose
* only differences are whitespace, and status=error otherwise.
* See diff-process.c for the protocol and gitattributes(5) for usage.
*
* Uses xdiff_compare_lines() with XDF_IGNORE_WHITESPACE to compare
* lines, giving the same whitespace handling as "git diff -w".
*/

#include "builtin.h"
#include "pkt-line.h"
#include "strbuf.h"
#include "xdiff-interface.h"

/*
* Read a single pkt-line. Returns 1 for data, 0 for flush, -1 for EOF.
*/
static int read_pkt(int fd, struct strbuf *line)
{
int len;
char *data;

if (packet_read_line_gently(fd, &len, &data) < 0)
return -1;
if (!data || !len)
return 0; /* flush */
strbuf_reset(line);
strbuf_add(line, data, len);
strbuf_rtrim(line);
return 1;
}

/*
* Read packetized content until a flush packet.
*/
static int read_content(int fd, struct strbuf *out)
{
strbuf_reset(out);
if (read_packetized_to_strbuf(fd, out, PACKET_READ_GENTLE_ON_EOF) < 0)
return -1;
return 0;
}

/*
* Compare two buffers line by line using xdiff_compare_lines() with
* XDF_IGNORE_WHITESPACE (same logic as "git diff -w").
* Returns 1 if all lines match, 0 otherwise.
*/
static int whitespace_equivalent(const char *a, long size_a,
const char *b, long size_b)
{
const char *ea = a + size_a;
const char *eb = b + size_b;

while (a < ea && b < eb) {
const char *eol_a = memchr(a, '\n', ea - a);
const char *eol_b = memchr(b, '\n', eb - b);
long len_a = (eol_a ? eol_a : ea) - a;
long len_b = (eol_b ? eol_b : eb) - b;

if (!xdiff_compare_lines(a, len_a, b, len_b,
XDF_IGNORE_WHITESPACE))
return 0;

a += len_a + (eol_a ? 1 : 0);
b += len_b + (eol_b ? 1 : 0);
}

/* Both sides must be exhausted */
return a >= ea && b >= eb;
}

int cmd_diff_process_normalize(int argc UNUSED, const char **argv UNUSED,
const char *prefix UNUSED,
struct repository *repo UNUSED)
{
struct strbuf line = STRBUF_INIT;
struct strbuf old_content = STRBUF_INIT;
struct strbuf new_content = STRBUF_INIT;
int ret;

/* Handshake: read client greeting */
ret = read_pkt(0, &line);
if (ret <= 0 || strcmp(line.buf, "git-diff-client"))
return 1;
ret = read_pkt(0, &line);
if (ret <= 0 || strcmp(line.buf, "version=1"))
return 1;
read_pkt(0, &line); /* flush */

/* Send server greeting */
packet_write_fmt(1, "git-diff-server\n");
packet_write_fmt(1, "version=1\n");
packet_flush(1);

/* Read client capabilities until flush */
while ((ret = read_pkt(0, &line)) > 0)
; /* consume */

/* Send our capabilities */
packet_write_fmt(1, "capability=hunks\n");
packet_flush(1);

/* Main loop: process file pairs */
for (;;) {
int have_command = 0;

/* Read request headers until flush */
while ((ret = read_pkt(0, &line)) > 0) {
if (starts_with(line.buf, "command="))
have_command = 1;
}
if (ret < 0)
break; /* EOF: client closed connection */
if (!have_command)
break;

/* Read old file content */
if (read_content(0, &old_content) < 0)
break;
/* Read new file content */
if (read_content(0, &new_content) < 0)
break;

if (whitespace_equivalent(old_content.buf, old_content.len,
new_content.buf, new_content.len)) {
/* Whitespace-only differences */
packet_flush(1); /* zero hunks */
packet_write_fmt(1, "status=success\n");
packet_flush(1);
} else {
/* Non-whitespace differences: fall back */
packet_flush(1);
packet_write_fmt(1, "status=error\n");
packet_flush(1);
}
}

strbuf_release(&line);
strbuf_release(&old_content);
strbuf_release(&new_content);
return 0;
}
Loading
Loading