Permalink
Browse files

[PATCH] Add -B flag to diff-* brothers.

A new diffcore transformation, diffcore-break.c, is introduced.

When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation.  This
makes it easier to review such a complete rewrite patch.

The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.

As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used.  For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:

    The original change looks like this:

	file0     --> file0' (quite different from file0)
	file1     --> /dev/null

    After diffcore-break runs, it would become this:

	file0     --> /dev/null
	/dev/null --> file0'
	file1     --> /dev/null

    Then diffcore-rename matches them up:

	file1     --> file0'

The internal score values are finer grained now.  Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information...
1 parent 2cd6888 commit f345b0a066572206aac4a4f9a57d746e213b6bff Junio C Hamano committed with Linus Torvalds May 30, 2005
Showing with 433 additions and 29 deletions.
  1. +4 −1 Documentation/git-diff-cache.txt
  2. +4 −1 Documentation/git-diff-files.txt
  3. +4 −1 Documentation/git-diff-tree.txt
  4. +2 −1 Makefile
  5. +9 −2 diff-cache.c
  6. +6 −2 diff-files.c
  7. +7 −1 diff-tree.c
  8. +18 −3 diff.c
  9. +4 −1 diff.h
  10. +127 −0 diffcore-break.c
  11. +32 −13 diffcore-rename.c
  12. +9 −3 diffcore.h
  13. +207 −0 t/t4008-diff-break-rewrite.sh
@@ -9,7 +9,7 @@ git-diff-cache - Compares content and mode of blobs between the cache and reposi
SYNOPSIS
--------
-'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [--cached] <tree-ish> [<path>...]
+'git-diff-cache' [-p] [-r] [-z] [-m] [-B] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [--cached] <tree-ish> [<path>...]
DESCRIPTION
-----------
@@ -35,6 +35,9 @@ OPTIONS
-z::
\0 line termination on output
+-B::
+ Break complete rewrite changes into pairs of delete and create.
+
-M::
Detect renames.
@@ -9,7 +9,7 @@ git-diff-files - Compares files in the working tree and the cache
SYNOPSIS
--------
-'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S<string>] [--pickaxe-all] [<pattern>...]
+'git-diff-files' [-p] [-q] [-r] [-z] [-B] [-M] [-C] [-R] [-S<string>] [--pickaxe-all] [<pattern>...]
DESCRIPTION
-----------
@@ -29,6 +29,9 @@ OPTIONS
-R::
Output diff in reverse.
+-B::
+ Break complete rewrite changes into pairs of delete and create.
+
-M::
Detect renames.
@@ -9,7 +9,7 @@ git-diff-tree - Compares the content and mode of blobs found via two tree object
SYNOPSIS
--------
-'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [-m] [-s] [-v] [-t] <tree-ish> <tree-ish> [<pattern>]\*
+'git-diff-tree' [-p] [-r] [-z] [--stdin] [-B] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [-m] [-s] [-v] [-t] <tree-ish> <tree-ish> [<pattern>]\*
DESCRIPTION
-----------
@@ -33,6 +33,9 @@ OPTIONS
generate patch (see section on generating patches). For
git-diff-tree, this flag implies '-r' as well.
+-B::
+ Break complete rewrite changes into pairs of delete and create.
+
-M::
Detect renames.
View
@@ -48,7 +48,7 @@ LIB_OBJS += strbuf.o
LIB_H += diff.h count-delta.h
LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o diffcore-pathspec.o \
- count-delta.o
+ count-delta.o diffcore-break.o
LIB_OBJS += gitenv.o
@@ -130,6 +130,7 @@ diff.o: $(LIB_H) diffcore.h
diffcore-rename.o : $(LIB_H) diffcore.h
diffcore-pathspec.o : $(LIB_H) diffcore.h
diffcore-pickaxe.o : $(LIB_H) diffcore.h
+diffcore-break.o : $(LIB_H) diffcore.h
test: all
$(MAKE) -C t/ all
View
@@ -9,6 +9,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
/* A file entry went away or appeared */
static void show_file(const char *prefix, struct cache_entry *ce, unsigned char *sha1, unsigned int mode)
@@ -188,6 +189,10 @@ int main(int argc, const char **argv)
diff_output_format = DIFF_FORMAT_PATCH;
continue;
}
+ if (!strncmp(arg, "-B", 2)) {
+ diff_break_opt = diff_scoreopt_parse(arg);
+ continue;
+ }
if (!strncmp(arg, "-M", 2)) {
detect_rename = DIFF_DETECT_RENAME;
diff_score_opt = diff_scoreopt_parse(arg);
@@ -240,9 +245,11 @@ int main(int argc, const char **argv)
die("unable to read tree object %s", tree_name);
ret = diff_cache(active_cache, active_nr);
- diffcore_std(pathspec,
+
+ diffcore_std(pathspec ? : NULL,
detect_rename, diff_score_opt,
- pickaxe, pickaxe_opts);
+ pickaxe, pickaxe_opts,
+ diff_break_opt);
diff_flush(diff_output_format, 1);
return ret;
}
View
@@ -15,6 +15,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
static int silent = 0;
static void show_unmerge(const char *path)
@@ -57,6 +58,8 @@ int main(int argc, const char **argv)
pickaxe = argv[1] + 2;
else if (!strcmp(argv[1], "--pickaxe-all"))
pickaxe_opts = DIFF_PICKAXE_ALL;
+ else if (!strncmp(argv[1], "-B", 2))
+ diff_break_opt = diff_scoreopt_parse(argv[1]);
else if (!strncmp(argv[1], "-M", 2)) {
diff_score_opt = diff_scoreopt_parse(argv[1]);
detect_rename = DIFF_DETECT_RENAME;
@@ -116,9 +119,10 @@ int main(int argc, const char **argv)
show_modified(oldmode, mode, ce->sha1, null_sha1,
ce->name);
}
- diffcore_std(argv + 1,
+ diffcore_std((1 < argc) ? argv + 1 : NULL,
detect_rename, diff_score_opt,
- pickaxe, pickaxe_opts);
+ pickaxe, pickaxe_opts,
+ diff_break_opt);
diff_flush(diff_output_format, 1);
return 0;
}
View
@@ -14,6 +14,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
static const char *header = NULL;
static const char *header_prefix = "";
@@ -263,7 +264,8 @@ static int call_diff_flush(void)
{
diffcore_std(0,
detect_rename, diff_score_opt,
- pickaxe, pickaxe_opts);
+ pickaxe, pickaxe_opts,
+ diff_break_opt);
if (diff_queue_is_empty()) {
diff_flush(DIFF_FORMAT_NO_OUTPUT, 0);
return 0;
@@ -523,6 +525,10 @@ int main(int argc, const char **argv)
diff_score_opt = diff_scoreopt_parse(arg);
continue;
}
+ if (!strncmp(arg, "-B", 2)) {
+ diff_break_opt = diff_scoreopt_parse(arg);
+ continue;
+ }
if (!strcmp(arg, "-z")) {
diff_output_format = DIFF_FORMAT_MACHINE;
continue;
View
@@ -603,6 +603,7 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
dp->two = two;
dp->score = 0;
dp->source_stays = 0;
+ dp->broken_pair = 0;
diff_q(queue, dp);
return dp;
}
@@ -637,6 +638,16 @@ static void diff_flush_raw(struct diff_filepair *p,
sprintf(status, "%c%03d", p->status,
(int)(0.5 + p->score * 100.0/MAX_SCORE));
break;
+ case 'N': case 'D':
+ two_paths = 0;
+ if (p->score)
+ sprintf(status, "%c%03d", p->status,
+ (int)(0.5 + p->score * 100.0/MAX_SCORE));
+ else {
+ status[0] = p->status;
+ status[1] = 0;
+ }
+ break;
default:
two_paths = 0;
status[0] = p->status;
@@ -760,8 +771,9 @@ void diff_debug_filepair(const struct diff_filepair *p, int i)
{
diff_debug_filespec(p->one, i, "one");
diff_debug_filespec(p->two, i, "two");
- fprintf(stderr, "score %d, status %c source_stays %d\n",
- p->score, p->status ? : '?', p->source_stays);
+ fprintf(stderr, "score %d, status %c stays %d broken %d\n",
+ p->score, p->status ? : '?',
+ p->source_stays, p->broken_pair);
}
void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
@@ -875,10 +887,13 @@ void diff_flush(int diff_output_style, int resolve_rename_copy)
void diffcore_std(const char **paths,
int detect_rename, int rename_score,
- const char *pickaxe, int pickaxe_opts)
+ const char *pickaxe, int pickaxe_opts,
+ int break_opt)
{
if (paths && paths[0])
diffcore_pathspec(paths);
+ if (0 <= break_opt)
+ diffcore_break(break_opt);
if (detect_rename)
diffcore_rename(detect_rename, rename_score);
if (pickaxe)
View
@@ -43,9 +43,12 @@ extern void diffcore_pickaxe(const char *needle, int opts);
extern void diffcore_pathspec(const char **pathspec);
+extern void diffcore_break(int);
+
extern void diffcore_std(const char **paths,
int detect_rename, int rename_score,
- const char *pickaxe, int pickaxe_opts);
+ const char *pickaxe, int pickaxe_opts,
+ int break_opt);
extern int diff_queue_is_empty(void);
View
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "delta.h"
+#include "count-delta.h"
+
+static int very_different(struct diff_filespec *src,
+ struct diff_filespec *dst,
+ int min_score)
+{
+ /* dst is recorded as a modification of src. Are they so
+ * different that we are better off recording this as a pair
+ * of delete and create? min_score is the minimum amount of
+ * new material that must exist in the dst and not in src for
+ * the pair to be considered a complete rewrite, and recommended
+ * to be set to a very high value, 99% or so.
+ *
+ * The value we return represents the amount of new material
+ * that is in dst and not in src. We return 0 when we do not
+ * want to get the filepair broken.
+ */
+ void *delta;
+ unsigned long delta_size, base_size;
+
+ if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
+ return 0; /* leave symlink rename alone */
+
+ if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
+ return 0; /* error but caught downstream */
+
+ delta_size = ((src->size < dst->size) ?
+ (dst->size - src->size) : (src->size - dst->size));
+
+ /* Notice that we use max of src and dst as the base size,
+ * unlike rename similarity detection. This is so that we do
+ * not mistake a large addition as a complete rewrite.
+ */
+ base_size = ((src->size < dst->size) ? dst->size : src->size);
+
+ /*
+ * If file size difference is too big compared to the
+ * base_size, we declare this a complete rewrite.
+ */
+ if (base_size * min_score < delta_size * MAX_SCORE)
+ return MAX_SCORE;
+
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ return 0; /* error but caught downstream */
+
+ delta = diff_delta(src->data, src->size,
+ dst->data, dst->size,
+ &delta_size);
+
+ /* A delta that has a lot of literal additions would have
+ * big delta_size no matter what else it does.
+ */
+ if (base_size * min_score < delta_size * MAX_SCORE)
+ return MAX_SCORE;
+
+ /* Estimate the edit size by interpreting delta. */
+ delta_size = count_delta(delta, delta_size);
+ free(delta);
+ if (delta_size == UINT_MAX)
+ return 0; /* error in delta computation */
+
+ if (base_size < delta_size)
+ return MAX_SCORE;
+
+ return delta_size * MAX_SCORE / base_size;
+}
+
+void diffcore_break(int min_score)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct diff_queue_struct outq;
+ int i;
+
+ if (!min_score)
+ min_score = DEFAULT_BREAK_SCORE;
+
+ outq.nr = outq.alloc = 0;
+ outq.queue = NULL;
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ int score;
+
+ /* We deal only with in-place edit of non directory.
+ * We do not break anything else.
+ */
+ if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) &&
+ !S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) &&
+ !strcmp(p->one->path, p->two->path)) {
+ score = very_different(p->one, p->two, min_score);
+ if (min_score <= score) {
+ /* Split this into delete and create */
+ struct diff_filespec *null_one, *null_two;
+ struct diff_filepair *dp;
+
+ /* deletion of one */
+ null_one = alloc_filespec(p->one->path);
+ dp = diff_queue(&outq, p->one, null_one);
+ dp->score = score;
+ dp->broken_pair = 1;
+
+ /* creation of two */
+ null_two = alloc_filespec(p->two->path);
+ dp = diff_queue(&outq, null_two, p->two);
+ dp->score = score;
+ dp->broken_pair = 1;
+
+ free(p); /* not diff_free_filepair(), we are
+ * reusing one and two here.
+ */
+ continue;
+ }
+ }
+ diff_q(&outq, p);
+ }
+ free(q->queue);
+ *q = outq;
+
+ return;
+}
Oops, something went wrong.

0 comments on commit f345b0a

Please sign in to comment.